1 //! Lowering rules for AArch64.
2 //!
3 //! TODO: opportunities for better code generation:
4 //!
5 //! - Smarter use of addressing modes. Recognize a+SCALE*b patterns; recognize
6 //! and incorporate sign/zero extension on indices. Recognize pre/post-index
7 //! opportunities.
8 //!
9 //! - Floating-point immediates (FIMM instruction).
10
11 use crate::ir::condcodes::{FloatCC, IntCC};
12 use crate::ir::types::*;
13 use crate::ir::Inst as IRInst;
14 use crate::ir::{InstructionData, Opcode, TrapCode, Type};
15 use crate::machinst::lower::*;
16 use crate::machinst::*;
17 use crate::CodegenResult;
18
19 use crate::isa::aarch64::inst::*;
20 use crate::isa::aarch64::AArch64Backend;
21
22 use super::lower_inst;
23
24 use log::debug;
25 use regalloc::{Reg, RegClass, Writable};
26
27 //============================================================================
28 // Result enum types.
29 //
30 // Lowering of a given value results in one of these enums, depending on the
31 // modes in which we can accept the value.
32
33 /// A lowering result: register, register-shift. An SSA value can always be
34 /// lowered into one of these options; the register form is the fallback.
35 #[derive(Clone, Debug)]
36 enum ResultRS {
37 Reg(Reg),
38 RegShift(Reg, ShiftOpAndAmt),
39 }
40
41 /// A lowering result: register, register-shift, register-extend. An SSA value can always be
42 /// lowered into one of these options; the register form is the fallback.
43 #[derive(Clone, Debug)]
44 enum ResultRSE {
45 Reg(Reg),
46 RegShift(Reg, ShiftOpAndAmt),
47 RegExtend(Reg, ExtendOp),
48 }
49
50 impl ResultRSE {
from_rs(rs: ResultRS) -> ResultRSE51 fn from_rs(rs: ResultRS) -> ResultRSE {
52 match rs {
53 ResultRS::Reg(r) => ResultRSE::Reg(r),
54 ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s),
55 }
56 }
57 }
58
59 /// A lowering result: register, register-shift, register-extend, or 12-bit immediate form.
60 /// An SSA value can always be lowered into one of these options; the register form is the
61 /// fallback.
62 #[derive(Clone, Debug)]
63 pub(crate) enum ResultRSEImm12 {
64 Reg(Reg),
65 RegShift(Reg, ShiftOpAndAmt),
66 RegExtend(Reg, ExtendOp),
67 Imm12(Imm12),
68 }
69
70 impl ResultRSEImm12 {
from_rse(rse: ResultRSE) -> ResultRSEImm1271 fn from_rse(rse: ResultRSE) -> ResultRSEImm12 {
72 match rse {
73 ResultRSE::Reg(r) => ResultRSEImm12::Reg(r),
74 ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s),
75 ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e),
76 }
77 }
78 }
79
80 /// A lowering result: register, register-shift, or logical immediate form.
81 /// An SSA value can always be lowered into one of these options; the register form is the
82 /// fallback.
83 #[derive(Clone, Debug)]
84 pub(crate) enum ResultRSImmLogic {
85 Reg(Reg),
86 RegShift(Reg, ShiftOpAndAmt),
87 ImmLogic(ImmLogic),
88 }
89
90 impl ResultRSImmLogic {
from_rs(rse: ResultRS) -> ResultRSImmLogic91 fn from_rs(rse: ResultRS) -> ResultRSImmLogic {
92 match rse {
93 ResultRS::Reg(r) => ResultRSImmLogic::Reg(r),
94 ResultRS::RegShift(r, s) => ResultRSImmLogic::RegShift(r, s),
95 }
96 }
97 }
98
99 /// A lowering result: register or immediate shift amount (arg to a shift op).
100 /// An SSA value can always be lowered into one of these options; the register form is the
101 /// fallback.
102 #[derive(Clone, Debug)]
103 pub(crate) enum ResultRegImmShift {
104 Reg(Reg),
105 ImmShift(ImmShift),
106 }
107
108 //============================================================================
109 // Instruction input "slots".
110 //
111 // We use these types to refer to operand numbers, and result numbers, together
112 // with the associated instruction, in a type-safe way.
113
114 /// Identifier for a particular input of an instruction.
115 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
116 pub(crate) struct InsnInput {
117 pub(crate) insn: IRInst,
118 pub(crate) input: usize,
119 }
120
121 /// Identifier for a particular output of an instruction.
122 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
123 pub(crate) struct InsnOutput {
124 pub(crate) insn: IRInst,
125 pub(crate) output: usize,
126 }
127
128 //============================================================================
129 // Lowering: convert instruction inputs to forms that we can use.
130
131 /// Lower an instruction input to a 64-bit constant, if possible.
input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64>132 pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
133 let input = ctx.get_input(input.insn, input.input);
134 input.constant
135 }
136
137 /// Lower an instruction input to a constant register-shift amount, if possible.
input_to_shiftimm<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<ShiftOpShiftImm>138 pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
139 ctx: &mut C,
140 input: InsnInput,
141 ) -> Option<ShiftOpShiftImm> {
142 input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
143 }
144
output_to_const_f128<C: LowerCtx<I = Inst>>( ctx: &mut C, out: InsnOutput, ) -> Option<u128>145 pub(crate) fn output_to_const_f128<C: LowerCtx<I = Inst>>(
146 ctx: &mut C,
147 out: InsnOutput,
148 ) -> Option<u128> {
149 if out.output > 0 {
150 None
151 } else {
152 let inst_data = ctx.data(out.insn);
153
154 match inst_data {
155 &InstructionData::UnaryConst {
156 opcode: _,
157 constant_handle,
158 } => {
159 let mut bytes = [0u8; 16];
160 let c = ctx.get_constant_data(constant_handle).clone().into_vec();
161 assert_eq!(c.len(), 16);
162 bytes.copy_from_slice(&c);
163 Some(u128::from_le_bytes(bytes))
164 }
165 _ => None,
166 }
167 }
168 }
169
170 /// How to handle narrow values loaded into registers; see note on `narrow_mode`
171 /// parameter to `input_to_*` below.
172 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
173 pub(crate) enum NarrowValueMode {
174 None,
175 /// Zero-extend to 32 bits if original is < 32 bits.
176 ZeroExtend32,
177 /// Sign-extend to 32 bits if original is < 32 bits.
178 SignExtend32,
179 /// Zero-extend to 64 bits if original is < 64 bits.
180 ZeroExtend64,
181 /// Sign-extend to 64 bits if original is < 64 bits.
182 SignExtend64,
183 }
184
185 impl NarrowValueMode {
is_32bit(&self) -> bool186 fn is_32bit(&self) -> bool {
187 match self {
188 NarrowValueMode::None => false,
189 NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true,
190 NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false,
191 }
192 }
193 }
194
195 /// Allocate a register for an instruction output and return it.
output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg>196 pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
197 ctx.get_output(out.insn, out.output)
198 }
199
200 /// Lower an instruction input to a reg.
201 ///
202 /// The given register will be extended appropriately, according to
203 /// `narrow_mode` and the input's type. If extended, the value is
204 /// always extended to 64 bits, for simplicity.
input_to_reg<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> Reg205 pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
206 ctx: &mut C,
207 input: InsnInput,
208 narrow_mode: NarrowValueMode,
209 ) -> Reg {
210 debug!("input_to_reg: input {:?}", input);
211 let ty = ctx.input_ty(input.insn, input.input);
212 let from_bits = ty_bits(ty) as u8;
213 let inputs = ctx.get_input(input.insn, input.input);
214 let in_reg = if let Some(c) = inputs.constant {
215 // Generate constants fresh at each use to minimize long-range register pressure.
216 let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
217 for inst in Inst::gen_constant(to_reg, c, ty).into_iter() {
218 ctx.emit(inst);
219 }
220 to_reg.to_reg()
221 } else {
222 ctx.use_input_reg(inputs);
223 inputs.reg
224 };
225
226 match (narrow_mode, from_bits) {
227 (NarrowValueMode::None, _) => in_reg,
228 (NarrowValueMode::ZeroExtend32, n) if n < 32 => {
229 let tmp = ctx.alloc_tmp(RegClass::I64, I32);
230 ctx.emit(Inst::Extend {
231 rd: tmp,
232 rn: in_reg,
233 signed: false,
234 from_bits,
235 to_bits: 32,
236 });
237 tmp.to_reg()
238 }
239 (NarrowValueMode::SignExtend32, n) if n < 32 => {
240 let tmp = ctx.alloc_tmp(RegClass::I64, I32);
241 ctx.emit(Inst::Extend {
242 rd: tmp,
243 rn: in_reg,
244 signed: true,
245 from_bits,
246 to_bits: 32,
247 });
248 tmp.to_reg()
249 }
250 (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
251
252 (NarrowValueMode::ZeroExtend64, n) if n < 64 => {
253 if inputs.constant.is_some() {
254 // Constants are zero-extended to full 64-bit width on load already.
255 in_reg
256 } else {
257 let tmp = ctx.alloc_tmp(RegClass::I64, I32);
258 ctx.emit(Inst::Extend {
259 rd: tmp,
260 rn: in_reg,
261 signed: false,
262 from_bits,
263 to_bits: 64,
264 });
265 tmp.to_reg()
266 }
267 }
268 (NarrowValueMode::SignExtend64, n) if n < 64 => {
269 let tmp = ctx.alloc_tmp(RegClass::I64, I32);
270 ctx.emit(Inst::Extend {
271 rd: tmp,
272 rn: in_reg,
273 signed: true,
274 from_bits,
275 to_bits: 64,
276 });
277 tmp.to_reg()
278 }
279 (_, 64) => in_reg,
280 (_, 128) => in_reg,
281
282 _ => panic!(
283 "Unsupported input width: input ty {} bits {} mode {:?}",
284 ty, from_bits, narrow_mode
285 ),
286 }
287 }
288
289 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
290 ///
291 /// The `narrow_mode` flag indicates whether the consumer of this value needs
292 /// the high bits clear. For many operations, such as an add/sub/mul or any
293 /// bitwise logical operation, the low-bit results depend only on the low-bit
294 /// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit
295 /// value is stored in the low 8 bits of the register and the high 24 bits are
296 /// undefined. If the op truly needs the high N bits clear (such as for a
297 /// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
298 /// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
299 /// register will be provided the extended value.
input_to_rs<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRS300 fn input_to_rs<C: LowerCtx<I = Inst>>(
301 ctx: &mut C,
302 input: InsnInput,
303 narrow_mode: NarrowValueMode,
304 ) -> ResultRS {
305 let inputs = ctx.get_input(input.insn, input.input);
306 if let Some((insn, 0)) = inputs.inst {
307 let op = ctx.data(insn).opcode();
308
309 if op == Opcode::Ishl {
310 let shiftee = InsnInput { insn, input: 0 };
311 let shift_amt = InsnInput { insn, input: 1 };
312
313 // Can we get the shift amount as an immediate?
314 if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
315 let reg = input_to_reg(ctx, shiftee, narrow_mode);
316 return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
317 }
318 }
319 }
320
321 ResultRS::Reg(input_to_reg(ctx, input, narrow_mode))
322 }
323
324 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
325 /// This does not actually codegen the source instruction; it just uses the
326 /// vreg into which the source instruction will generate its value.
327 ///
328 /// See note on `input_to_rs` for a description of `narrow_mode`.
input_to_rse<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSE329 fn input_to_rse<C: LowerCtx<I = Inst>>(
330 ctx: &mut C,
331 input: InsnInput,
332 narrow_mode: NarrowValueMode,
333 ) -> ResultRSE {
334 let inputs = ctx.get_input(input.insn, input.input);
335 if let Some((insn, 0)) = inputs.inst {
336 let op = ctx.data(insn).opcode();
337 let out_ty = ctx.output_ty(insn, 0);
338 let out_bits = ty_bits(out_ty);
339
340 // If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
341 // then get the result into a register and return an Extend-mode operand on
342 // that register.
343 if narrow_mode != NarrowValueMode::None
344 && ((narrow_mode.is_32bit() && out_bits < 32)
345 || (!narrow_mode.is_32bit() && out_bits < 64))
346 {
347 let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
348 let extendop = match (narrow_mode, out_bits) {
349 (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
350 ExtendOp::SXTB
351 }
352 (NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => {
353 ExtendOp::UXTB
354 }
355 (NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => {
356 ExtendOp::SXTB
357 }
358 (NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => {
359 ExtendOp::UXTB
360 }
361 (NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => {
362 ExtendOp::SXTH
363 }
364 (NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => {
365 ExtendOp::UXTH
366 }
367 (NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW,
368 (NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
369 _ => unreachable!(),
370 };
371 return ResultRSE::RegExtend(reg, extendop);
372 }
373
374 // Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
375 if op == Opcode::Uextend || op == Opcode::Sextend {
376 assert!(out_bits == 32 || out_bits == 64);
377 let sign_extend = op == Opcode::Sextend;
378 let inner_ty = ctx.input_ty(insn, 0);
379 let inner_bits = ty_bits(inner_ty);
380 assert!(inner_bits < out_bits);
381 let extendop = match (sign_extend, inner_bits) {
382 (true, 1) => ExtendOp::SXTB,
383 (false, 1) => ExtendOp::UXTB,
384 (true, 8) => ExtendOp::SXTB,
385 (false, 8) => ExtendOp::UXTB,
386 (true, 16) => ExtendOp::SXTH,
387 (false, 16) => ExtendOp::UXTH,
388 (true, 32) => ExtendOp::SXTW,
389 (false, 32) => ExtendOp::UXTW,
390 _ => unreachable!(),
391 };
392 let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
393 return ResultRSE::RegExtend(reg, extendop);
394 }
395 }
396
397 ResultRSE::from_rs(input_to_rs(ctx, input, narrow_mode))
398 }
399
input_to_rse_imm12<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSEImm12400 pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
401 ctx: &mut C,
402 input: InsnInput,
403 narrow_mode: NarrowValueMode,
404 ) -> ResultRSEImm12 {
405 if let Some(imm_value) = input_to_const(ctx, input) {
406 if let Some(i) = Imm12::maybe_from_u64(imm_value) {
407 return ResultRSEImm12::Imm12(i);
408 }
409 }
410
411 ResultRSEImm12::from_rse(input_to_rse(ctx, input, narrow_mode))
412 }
413
input_to_rs_immlogic<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSImmLogic414 pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
415 ctx: &mut C,
416 input: InsnInput,
417 narrow_mode: NarrowValueMode,
418 ) -> ResultRSImmLogic {
419 if let Some(imm_value) = input_to_const(ctx, input) {
420 let ty = ctx.input_ty(input.insn, input.input);
421 let ty = if ty_bits(ty) < 32 { I32 } else { ty };
422 if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
423 return ResultRSImmLogic::ImmLogic(i);
424 }
425 }
426
427 ResultRSImmLogic::from_rs(input_to_rs(ctx, input, narrow_mode))
428 }
429
input_to_reg_immshift<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> ResultRegImmShift430 pub(crate) fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
431 ctx: &mut C,
432 input: InsnInput,
433 ) -> ResultRegImmShift {
434 if let Some(imm_value) = input_to_const(ctx, input) {
435 if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
436 return ResultRegImmShift::ImmShift(immshift);
437 }
438 }
439
440 ResultRegImmShift::Reg(input_to_reg(ctx, input, NarrowValueMode::None))
441 }
442
443 //============================================================================
444 // ALU instruction constructors.
445
alu_inst_imm12(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSEImm12) -> Inst446 pub(crate) fn alu_inst_imm12(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSEImm12) -> Inst {
447 match rm {
448 ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 {
449 alu_op: op,
450 rd,
451 rn,
452 imm12,
453 },
454 ResultRSEImm12::Reg(rm) => Inst::AluRRR {
455 alu_op: op,
456 rd,
457 rn,
458 rm,
459 },
460 ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift {
461 alu_op: op,
462 rd,
463 rn,
464 rm,
465 shiftop,
466 },
467 ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend {
468 alu_op: op,
469 rd,
470 rn,
471 rm,
472 extendop,
473 },
474 }
475 }
476
alu_inst_immlogic( op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSImmLogic, ) -> Inst477 pub(crate) fn alu_inst_immlogic(
478 op: ALUOp,
479 rd: Writable<Reg>,
480 rn: Reg,
481 rm: ResultRSImmLogic,
482 ) -> Inst {
483 match rm {
484 ResultRSImmLogic::ImmLogic(imml) => Inst::AluRRImmLogic {
485 alu_op: op,
486 rd,
487 rn,
488 imml,
489 },
490 ResultRSImmLogic::Reg(rm) => Inst::AluRRR {
491 alu_op: op,
492 rd,
493 rn,
494 rm,
495 },
496 ResultRSImmLogic::RegShift(rm, shiftop) => Inst::AluRRRShift {
497 alu_op: op,
498 rd,
499 rn,
500 rm,
501 shiftop,
502 },
503 }
504 }
505
alu_inst_immshift( op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRegImmShift, ) -> Inst506 pub(crate) fn alu_inst_immshift(
507 op: ALUOp,
508 rd: Writable<Reg>,
509 rn: Reg,
510 rm: ResultRegImmShift,
511 ) -> Inst {
512 match rm {
513 ResultRegImmShift::ImmShift(immshift) => Inst::AluRRImmShift {
514 alu_op: op,
515 rd,
516 rn,
517 immshift,
518 },
519 ResultRegImmShift::Reg(rm) => Inst::AluRRR {
520 alu_op: op,
521 rd,
522 rn,
523 rm,
524 },
525 }
526 }
527
528 //============================================================================
529 // Lowering: addressing mode support. Takes instruction directly, rather
530 // than an `InsnInput`, to do more introspection.
531
532 /// Lower the address of a load or store.
lower_address<C: LowerCtx<I = Inst>>( ctx: &mut C, elem_ty: Type, addends: &[InsnInput], offset: i32, ) -> MemArg533 pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
534 ctx: &mut C,
535 elem_ty: Type,
536 addends: &[InsnInput],
537 offset: i32,
538 ) -> MemArg {
539 // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
540 // mul instructions (Load/StoreComplex don't include scale factors).
541
542 // Handle one reg and offset.
543 if addends.len() == 1 {
544 let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
545 return MemArg::RegOffset(reg, offset as i64, elem_ty);
546 }
547
548 // Handle two regs and a zero offset, if possible.
549 if addends.len() == 2 && offset == 0 {
550 let ra = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
551 let rb = input_to_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
552 return MemArg::reg_plus_reg(ra, rb);
553 }
554
555 // Otherwise, generate add instructions.
556 let addr = ctx.alloc_tmp(RegClass::I64, I64);
557
558 // Get the const into a reg.
559 lower_constant_u64(ctx, addr.clone(), offset as u64);
560
561 // Add each addend to the address.
562 for addend in addends {
563 let reg = input_to_reg(ctx, *addend, NarrowValueMode::ZeroExtend64);
564
565 // In an addition, the stack register is the zero register, so divert it to another
566 // register just before doing the actual add.
567 let reg = if reg == stack_reg() {
568 let tmp = ctx.alloc_tmp(RegClass::I64, I64);
569 ctx.emit(Inst::Mov {
570 rd: tmp,
571 rm: stack_reg(),
572 });
573 tmp.to_reg()
574 } else {
575 reg
576 };
577
578 ctx.emit(Inst::AluRRR {
579 alu_op: ALUOp::Add64,
580 rd: addr.clone(),
581 rn: addr.to_reg(),
582 rm: reg.clone(),
583 });
584 }
585
586 MemArg::reg(addr.to_reg())
587 }
588
lower_constant_u64<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: u64, )589 pub(crate) fn lower_constant_u64<C: LowerCtx<I = Inst>>(
590 ctx: &mut C,
591 rd: Writable<Reg>,
592 value: u64,
593 ) {
594 for inst in Inst::load_constant(rd, value) {
595 ctx.emit(inst);
596 }
597 }
598
lower_constant_f32<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: f32, )599 pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
600 ctx: &mut C,
601 rd: Writable<Reg>,
602 value: f32,
603 ) {
604 ctx.emit(Inst::load_fp_constant32(rd, value));
605 }
606
lower_constant_f64<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: f64, )607 pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
608 ctx: &mut C,
609 rd: Writable<Reg>,
610 value: f64,
611 ) {
612 ctx.emit(Inst::load_fp_constant64(rd, value));
613 }
614
lower_constant_f128<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: u128, )615 pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
616 ctx: &mut C,
617 rd: Writable<Reg>,
618 value: u128,
619 ) {
620 ctx.emit(Inst::load_fp_constant128(rd, value));
621 }
622
lower_condcode(cc: IntCC) -> Cond623 pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
624 match cc {
625 IntCC::Equal => Cond::Eq,
626 IntCC::NotEqual => Cond::Ne,
627 IntCC::SignedGreaterThanOrEqual => Cond::Ge,
628 IntCC::SignedGreaterThan => Cond::Gt,
629 IntCC::SignedLessThanOrEqual => Cond::Le,
630 IntCC::SignedLessThan => Cond::Lt,
631 IntCC::UnsignedGreaterThanOrEqual => Cond::Hs,
632 IntCC::UnsignedGreaterThan => Cond::Hi,
633 IntCC::UnsignedLessThanOrEqual => Cond::Ls,
634 IntCC::UnsignedLessThan => Cond::Lo,
635 IntCC::Overflow => Cond::Vs,
636 IntCC::NotOverflow => Cond::Vc,
637 }
638 }
639
lower_fp_condcode(cc: FloatCC) -> Cond640 pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
641 // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
642 // The FCMP instruction sets:
643 // NZCV
644 // - PCSR.NZCV = 0011 on UN (unordered),
645 // 0110 on EQ,
646 // 1000 on LT,
647 // 0010 on GT.
648 match cc {
649 // EQ | LT | GT. Vc => V clear.
650 FloatCC::Ordered => Cond::Vc,
651 // UN. Vs => V set.
652 FloatCC::Unordered => Cond::Vs,
653 // EQ. Eq => Z set.
654 FloatCC::Equal => Cond::Eq,
655 // UN | LT | GT. Ne => Z clear.
656 FloatCC::NotEqual => Cond::Ne,
657 // LT | GT.
658 FloatCC::OrderedNotEqual => unimplemented!(),
659 // UN | EQ
660 FloatCC::UnorderedOrEqual => unimplemented!(),
661 // LT. Mi => N set.
662 FloatCC::LessThan => Cond::Mi,
663 // LT | EQ. Ls => C clear or Z set.
664 FloatCC::LessThanOrEqual => Cond::Ls,
665 // GT. Gt => Z clear, N = V.
666 FloatCC::GreaterThan => Cond::Gt,
667 // GT | EQ. Ge => N = V.
668 FloatCC::GreaterThanOrEqual => Cond::Ge,
669 // UN | LT
670 FloatCC::UnorderedOrLessThan => unimplemented!(),
671 // UN | LT | EQ
672 FloatCC::UnorderedOrLessThanOrEqual => unimplemented!(),
673 // UN | GT
674 FloatCC::UnorderedOrGreaterThan => unimplemented!(),
675 // UN | GT | EQ
676 FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!(),
677 }
678 }
679
680 /// Determines whether this condcode interprets inputs as signed or
681 /// unsigned. See the documentation for the `icmp` instruction in
682 /// cranelift-codegen/meta/src/shared/instructions.rs for further insights
683 /// into this.
condcode_is_signed(cc: IntCC) -> bool684 pub fn condcode_is_signed(cc: IntCC) -> bool {
685 match cc {
686 IntCC::Equal => false,
687 IntCC::NotEqual => false,
688 IntCC::SignedGreaterThanOrEqual => true,
689 IntCC::SignedGreaterThan => true,
690 IntCC::SignedLessThanOrEqual => true,
691 IntCC::SignedLessThan => true,
692 IntCC::UnsignedGreaterThanOrEqual => false,
693 IntCC::UnsignedGreaterThan => false,
694 IntCC::UnsignedLessThanOrEqual => false,
695 IntCC::UnsignedLessThan => false,
696 IntCC::Overflow => true,
697 IntCC::NotOverflow => true,
698 }
699 }
700
701 //=============================================================================
702 // Helpers for instruction lowering.
703
704 /// Returns the size (in bits) of a given type.
ty_bits(ty: Type) -> usize705 pub fn ty_bits(ty: Type) -> usize {
706 match ty {
707 B1 => 1,
708 B8 | I8 => 8,
709 B16 | I16 => 16,
710 B32 | I32 | F32 => 32,
711 B64 | I64 | F64 => 64,
712 B128 | I128 => 128,
713 IFLAGS | FFLAGS => 32,
714 I8X16 | B8X16 => 128,
715 _ => panic!("ty_bits() on unknown type: {:?}", ty),
716 }
717 }
718
ty_is_int(ty: Type) -> bool719 pub(crate) fn ty_is_int(ty: Type) -> bool {
720 match ty {
721 B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
722 F32 | F64 | B128 | I128 | I8X16 => false,
723 IFLAGS | FFLAGS => panic!("Unexpected flags type"),
724 _ => panic!("ty_is_int() on unknown type: {:?}", ty),
725 }
726 }
727
ty_is_float(ty: Type) -> bool728 pub(crate) fn ty_is_float(ty: Type) -> bool {
729 !ty_is_int(ty)
730 }
731
choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T732 pub(crate) fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
733 let bits = ty_bits(ty);
734 if bits <= 32 {
735 op32
736 } else if bits == 64 {
737 op64
738 } else {
739 panic!("choose_32_64 on > 64 bits!")
740 }
741 }
742
ldst_offset(data: &InstructionData) -> Option<i32>743 pub(crate) fn ldst_offset(data: &InstructionData) -> Option<i32> {
744 match data {
745 &InstructionData::Load { offset, .. }
746 | &InstructionData::StackLoad { offset, .. }
747 | &InstructionData::LoadComplex { offset, .. }
748 | &InstructionData::Store { offset, .. }
749 | &InstructionData::StackStore { offset, .. }
750 | &InstructionData::StoreComplex { offset, .. } => Some(offset.into()),
751 _ => None,
752 }
753 }
754
inst_condcode(data: &InstructionData) -> Option<IntCC>755 pub(crate) fn inst_condcode(data: &InstructionData) -> Option<IntCC> {
756 match data {
757 &InstructionData::IntCond { cond, .. }
758 | &InstructionData::BranchIcmp { cond, .. }
759 | &InstructionData::IntCompare { cond, .. }
760 | &InstructionData::IntCondTrap { cond, .. }
761 | &InstructionData::BranchInt { cond, .. }
762 | &InstructionData::IntSelect { cond, .. }
763 | &InstructionData::IntCompareImm { cond, .. } => Some(cond),
764 _ => None,
765 }
766 }
767
inst_fp_condcode(data: &InstructionData) -> Option<FloatCC>768 pub(crate) fn inst_fp_condcode(data: &InstructionData) -> Option<FloatCC> {
769 match data {
770 &InstructionData::BranchFloat { cond, .. }
771 | &InstructionData::FloatCompare { cond, .. }
772 | &InstructionData::FloatCond { cond, .. }
773 | &InstructionData::FloatCondTrap { cond, .. } => Some(cond),
774 _ => None,
775 }
776 }
777
inst_trapcode(data: &InstructionData) -> Option<TrapCode>778 pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
779 match data {
780 &InstructionData::Trap { code, .. }
781 | &InstructionData::CondTrap { code, .. }
782 | &InstructionData::IntCondTrap { code, .. }
783 | &InstructionData::FloatCondTrap { code, .. } => Some(code),
784 _ => None,
785 }
786 }
787
788 /// Checks for an instance of `op` feeding the given input.
maybe_input_insn<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, ) -> Option<IRInst>789 pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
790 c: &mut C,
791 input: InsnInput,
792 op: Opcode,
793 ) -> Option<IRInst> {
794 let inputs = c.get_input(input.insn, input.input);
795 debug!(
796 "maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
797 input, inputs, op
798 );
799 if let Some((src_inst, _)) = inputs.inst {
800 let data = c.data(src_inst);
801 debug!(" -> input inst {:?}", data);
802 if data.opcode() == op {
803 return Some(src_inst);
804 }
805 }
806 None
807 }
808
809 /// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
810 /// Bint or a bitcast).
811 ///
812 /// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
813 /// a bit more generic.
maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, conv: Opcode, ) -> Option<IRInst>814 pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
815 c: &mut C,
816 input: InsnInput,
817 op: Opcode,
818 conv: Opcode,
819 ) -> Option<IRInst> {
820 let inputs = c.get_input(input.insn, input.input);
821 if let Some((src_inst, _)) = inputs.inst {
822 let data = c.data(src_inst);
823 if data.opcode() == op {
824 return Some(src_inst);
825 }
826 if data.opcode() == conv {
827 let inputs = c.get_input(src_inst, 0);
828 if let Some((src_inst, _)) = inputs.inst {
829 let data = c.data(src_inst);
830 if data.opcode() == op {
831 return Some(src_inst);
832 }
833 }
834 }
835 }
836 None
837 }
838
lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>( ctx: &mut C, insn: IRInst, is_signed: bool, )839 pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
840 ctx: &mut C,
841 insn: IRInst,
842 is_signed: bool,
843 ) {
844 debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
845 let ty = ctx.input_ty(insn, 0);
846 let bits = ty_bits(ty);
847 let narrow_mode = match (bits <= 32, is_signed) {
848 (true, true) => NarrowValueMode::SignExtend32,
849 (true, false) => NarrowValueMode::ZeroExtend32,
850 (false, true) => NarrowValueMode::SignExtend64,
851 (false, false) => NarrowValueMode::ZeroExtend64,
852 };
853 let inputs = [
854 InsnInput {
855 insn: insn,
856 input: 0,
857 },
858 InsnInput {
859 insn: insn,
860 input: 1,
861 },
862 ];
863 let ty = ctx.input_ty(insn, 0);
864 let rn = input_to_reg(ctx, inputs[0], narrow_mode);
865 let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
866 debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
867 let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
868 let rd = writable_zero_reg();
869 ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
870 }
871
lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst)872 pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
873 let ty = ctx.input_ty(insn, 0);
874 let bits = ty_bits(ty);
875 let inputs = [
876 InsnInput {
877 insn: insn,
878 input: 0,
879 },
880 InsnInput {
881 insn: insn,
882 input: 1,
883 },
884 ];
885 let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
886 let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
887 match bits {
888 32 => {
889 ctx.emit(Inst::FpuCmp32 { rn, rm });
890 }
891 64 => {
892 ctx.emit(Inst::FpuCmp64 { rn, rm });
893 }
894 _ => panic!("Unknown float size"),
895 }
896 }
897
898 //=============================================================================
899 // Lowering-backend trait implementation.
900
901 impl LowerBackend for AArch64Backend {
902 type MInst = Inst;
903
lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()>904 fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
905 lower_inst::lower_insn_to_regs(ctx, ir_inst)
906 }
907
lower_branch_group<C: LowerCtx<I = Inst>>( &self, ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], fallthrough: Option<MachLabel>, ) -> CodegenResult<()>908 fn lower_branch_group<C: LowerCtx<I = Inst>>(
909 &self,
910 ctx: &mut C,
911 branches: &[IRInst],
912 targets: &[MachLabel],
913 fallthrough: Option<MachLabel>,
914 ) -> CodegenResult<()> {
915 lower_inst::lower_branch(ctx, branches, targets, fallthrough)
916 }
917
maybe_pinned_reg(&self) -> Option<Reg>918 fn maybe_pinned_reg(&self) -> Option<Reg> {
919 Some(xreg(PINNED_REG))
920 }
921 }
922