1 //! Lowering rules for AArch64.
2 //!
3 //! TODO: opportunities for better code generation:
4 //!
5 //! - Smarter use of addressing modes. Recognize a+SCALE*b patterns; recognize
6 //!   and incorporate sign/zero extension on indices. Recognize pre/post-index
7 //!   opportunities.
8 //!
9 //! - Floating-point immediates (FIMM instruction).
10 
11 use crate::ir::condcodes::{FloatCC, IntCC};
12 use crate::ir::types::*;
13 use crate::ir::Inst as IRInst;
14 use crate::ir::{InstructionData, Opcode, TrapCode, Type};
15 use crate::machinst::lower::*;
16 use crate::machinst::*;
17 use crate::CodegenResult;
18 
19 use crate::isa::aarch64::inst::*;
20 use crate::isa::aarch64::AArch64Backend;
21 
22 use super::lower_inst;
23 
24 use log::debug;
25 use regalloc::{Reg, RegClass, Writable};
26 
27 //============================================================================
28 // Result enum types.
29 //
30 // Lowering of a given value results in one of these enums, depending on the
31 // modes in which we can accept the value.
32 
33 /// A lowering result: register, register-shift.  An SSA value can always be
34 /// lowered into one of these options; the register form is the fallback.
35 #[derive(Clone, Debug)]
36 enum ResultRS {
37     Reg(Reg),
38     RegShift(Reg, ShiftOpAndAmt),
39 }
40 
41 /// A lowering result: register, register-shift, register-extend.  An SSA value can always be
42 /// lowered into one of these options; the register form is the fallback.
43 #[derive(Clone, Debug)]
44 enum ResultRSE {
45     Reg(Reg),
46     RegShift(Reg, ShiftOpAndAmt),
47     RegExtend(Reg, ExtendOp),
48 }
49 
50 impl ResultRSE {
from_rs(rs: ResultRS) -> ResultRSE51     fn from_rs(rs: ResultRS) -> ResultRSE {
52         match rs {
53             ResultRS::Reg(r) => ResultRSE::Reg(r),
54             ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s),
55         }
56     }
57 }
58 
59 /// A lowering result: register, register-shift, register-extend, or 12-bit immediate form.
60 /// An SSA value can always be lowered into one of these options; the register form is the
61 /// fallback.
62 #[derive(Clone, Debug)]
63 pub(crate) enum ResultRSEImm12 {
64     Reg(Reg),
65     RegShift(Reg, ShiftOpAndAmt),
66     RegExtend(Reg, ExtendOp),
67     Imm12(Imm12),
68 }
69 
70 impl ResultRSEImm12 {
from_rse(rse: ResultRSE) -> ResultRSEImm1271     fn from_rse(rse: ResultRSE) -> ResultRSEImm12 {
72         match rse {
73             ResultRSE::Reg(r) => ResultRSEImm12::Reg(r),
74             ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s),
75             ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e),
76         }
77     }
78 }
79 
80 /// A lowering result: register, register-shift, or logical immediate form.
81 /// An SSA value can always be lowered into one of these options; the register form is the
82 /// fallback.
83 #[derive(Clone, Debug)]
84 pub(crate) enum ResultRSImmLogic {
85     Reg(Reg),
86     RegShift(Reg, ShiftOpAndAmt),
87     ImmLogic(ImmLogic),
88 }
89 
90 impl ResultRSImmLogic {
from_rs(rse: ResultRS) -> ResultRSImmLogic91     fn from_rs(rse: ResultRS) -> ResultRSImmLogic {
92         match rse {
93             ResultRS::Reg(r) => ResultRSImmLogic::Reg(r),
94             ResultRS::RegShift(r, s) => ResultRSImmLogic::RegShift(r, s),
95         }
96     }
97 }
98 
99 /// A lowering result: register or immediate shift amount (arg to a shift op).
100 /// An SSA value can always be lowered into one of these options; the register form is the
101 /// fallback.
102 #[derive(Clone, Debug)]
103 pub(crate) enum ResultRegImmShift {
104     Reg(Reg),
105     ImmShift(ImmShift),
106 }
107 
108 //============================================================================
109 // Instruction input "slots".
110 //
111 // We use these types to refer to operand numbers, and result numbers, together
112 // with the associated instruction, in a type-safe way.
113 
114 /// Identifier for a particular input of an instruction.
115 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
116 pub(crate) struct InsnInput {
117     pub(crate) insn: IRInst,
118     pub(crate) input: usize,
119 }
120 
121 /// Identifier for a particular output of an instruction.
122 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
123 pub(crate) struct InsnOutput {
124     pub(crate) insn: IRInst,
125     pub(crate) output: usize,
126 }
127 
128 //============================================================================
129 // Lowering: convert instruction inputs to forms that we can use.
130 
131 /// Lower an instruction input to a 64-bit constant, if possible.
input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64>132 pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
133     let input = ctx.get_input(input.insn, input.input);
134     input.constant
135 }
136 
137 /// Lower an instruction input to a constant register-shift amount, if possible.
input_to_shiftimm<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<ShiftOpShiftImm>138 pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
139     ctx: &mut C,
140     input: InsnInput,
141 ) -> Option<ShiftOpShiftImm> {
142     input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
143 }
144 
output_to_const_f128<C: LowerCtx<I = Inst>>( ctx: &mut C, out: InsnOutput, ) -> Option<u128>145 pub(crate) fn output_to_const_f128<C: LowerCtx<I = Inst>>(
146     ctx: &mut C,
147     out: InsnOutput,
148 ) -> Option<u128> {
149     if out.output > 0 {
150         None
151     } else {
152         let inst_data = ctx.data(out.insn);
153 
154         match inst_data {
155             &InstructionData::UnaryConst {
156                 opcode: _,
157                 constant_handle,
158             } => {
159                 let mut bytes = [0u8; 16];
160                 let c = ctx.get_constant_data(constant_handle).clone().into_vec();
161                 assert_eq!(c.len(), 16);
162                 bytes.copy_from_slice(&c);
163                 Some(u128::from_le_bytes(bytes))
164             }
165             _ => None,
166         }
167     }
168 }
169 
170 /// How to handle narrow values loaded into registers; see note on `narrow_mode`
171 /// parameter to `input_to_*` below.
172 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
173 pub(crate) enum NarrowValueMode {
174     None,
175     /// Zero-extend to 32 bits if original is < 32 bits.
176     ZeroExtend32,
177     /// Sign-extend to 32 bits if original is < 32 bits.
178     SignExtend32,
179     /// Zero-extend to 64 bits if original is < 64 bits.
180     ZeroExtend64,
181     /// Sign-extend to 64 bits if original is < 64 bits.
182     SignExtend64,
183 }
184 
185 impl NarrowValueMode {
is_32bit(&self) -> bool186     fn is_32bit(&self) -> bool {
187         match self {
188             NarrowValueMode::None => false,
189             NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true,
190             NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false,
191         }
192     }
193 }
194 
195 /// Allocate a register for an instruction output and return it.
output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg>196 pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
197     ctx.get_output(out.insn, out.output)
198 }
199 
200 /// Lower an instruction input to a reg.
201 ///
202 /// The given register will be extended appropriately, according to
203 /// `narrow_mode` and the input's type. If extended, the value is
204 /// always extended to 64 bits, for simplicity.
input_to_reg<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> Reg205 pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
206     ctx: &mut C,
207     input: InsnInput,
208     narrow_mode: NarrowValueMode,
209 ) -> Reg {
210     debug!("input_to_reg: input {:?}", input);
211     let ty = ctx.input_ty(input.insn, input.input);
212     let from_bits = ty_bits(ty) as u8;
213     let inputs = ctx.get_input(input.insn, input.input);
214     let in_reg = if let Some(c) = inputs.constant {
215         // Generate constants fresh at each use to minimize long-range register pressure.
216         let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
217         for inst in Inst::gen_constant(to_reg, c, ty).into_iter() {
218             ctx.emit(inst);
219         }
220         to_reg.to_reg()
221     } else {
222         ctx.use_input_reg(inputs);
223         inputs.reg
224     };
225 
226     match (narrow_mode, from_bits) {
227         (NarrowValueMode::None, _) => in_reg,
228         (NarrowValueMode::ZeroExtend32, n) if n < 32 => {
229             let tmp = ctx.alloc_tmp(RegClass::I64, I32);
230             ctx.emit(Inst::Extend {
231                 rd: tmp,
232                 rn: in_reg,
233                 signed: false,
234                 from_bits,
235                 to_bits: 32,
236             });
237             tmp.to_reg()
238         }
239         (NarrowValueMode::SignExtend32, n) if n < 32 => {
240             let tmp = ctx.alloc_tmp(RegClass::I64, I32);
241             ctx.emit(Inst::Extend {
242                 rd: tmp,
243                 rn: in_reg,
244                 signed: true,
245                 from_bits,
246                 to_bits: 32,
247             });
248             tmp.to_reg()
249         }
250         (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
251 
252         (NarrowValueMode::ZeroExtend64, n) if n < 64 => {
253             if inputs.constant.is_some() {
254                 // Constants are zero-extended to full 64-bit width on load already.
255                 in_reg
256             } else {
257                 let tmp = ctx.alloc_tmp(RegClass::I64, I32);
258                 ctx.emit(Inst::Extend {
259                     rd: tmp,
260                     rn: in_reg,
261                     signed: false,
262                     from_bits,
263                     to_bits: 64,
264                 });
265                 tmp.to_reg()
266             }
267         }
268         (NarrowValueMode::SignExtend64, n) if n < 64 => {
269             let tmp = ctx.alloc_tmp(RegClass::I64, I32);
270             ctx.emit(Inst::Extend {
271                 rd: tmp,
272                 rn: in_reg,
273                 signed: true,
274                 from_bits,
275                 to_bits: 64,
276             });
277             tmp.to_reg()
278         }
279         (_, 64) => in_reg,
280         (_, 128) => in_reg,
281 
282         _ => panic!(
283             "Unsupported input width: input ty {} bits {} mode {:?}",
284             ty, from_bits, narrow_mode
285         ),
286     }
287 }
288 
289 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
290 ///
291 /// The `narrow_mode` flag indicates whether the consumer of this value needs
292 /// the high bits clear. For many operations, such as an add/sub/mul or any
293 /// bitwise logical operation, the low-bit results depend only on the low-bit
294 /// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit
295 /// value is stored in the low 8 bits of the register and the high 24 bits are
296 /// undefined. If the op truly needs the high N bits clear (such as for a
297 /// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
298 /// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
299 /// register will be provided the extended value.
input_to_rs<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRS300 fn input_to_rs<C: LowerCtx<I = Inst>>(
301     ctx: &mut C,
302     input: InsnInput,
303     narrow_mode: NarrowValueMode,
304 ) -> ResultRS {
305     let inputs = ctx.get_input(input.insn, input.input);
306     if let Some((insn, 0)) = inputs.inst {
307         let op = ctx.data(insn).opcode();
308 
309         if op == Opcode::Ishl {
310             let shiftee = InsnInput { insn, input: 0 };
311             let shift_amt = InsnInput { insn, input: 1 };
312 
313             // Can we get the shift amount as an immediate?
314             if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
315                 let reg = input_to_reg(ctx, shiftee, narrow_mode);
316                 return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
317             }
318         }
319     }
320 
321     ResultRS::Reg(input_to_reg(ctx, input, narrow_mode))
322 }
323 
324 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
325 /// This does not actually codegen the source instruction; it just uses the
326 /// vreg into which the source instruction will generate its value.
327 ///
328 /// See note on `input_to_rs` for a description of `narrow_mode`.
input_to_rse<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSE329 fn input_to_rse<C: LowerCtx<I = Inst>>(
330     ctx: &mut C,
331     input: InsnInput,
332     narrow_mode: NarrowValueMode,
333 ) -> ResultRSE {
334     let inputs = ctx.get_input(input.insn, input.input);
335     if let Some((insn, 0)) = inputs.inst {
336         let op = ctx.data(insn).opcode();
337         let out_ty = ctx.output_ty(insn, 0);
338         let out_bits = ty_bits(out_ty);
339 
340         // If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
341         // then get the result into a register and return an Extend-mode operand on
342         // that register.
343         if narrow_mode != NarrowValueMode::None
344             && ((narrow_mode.is_32bit() && out_bits < 32)
345                 || (!narrow_mode.is_32bit() && out_bits < 64))
346         {
347             let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
348             let extendop = match (narrow_mode, out_bits) {
349                 (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
350                     ExtendOp::SXTB
351                 }
352                 (NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => {
353                     ExtendOp::UXTB
354                 }
355                 (NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => {
356                     ExtendOp::SXTB
357                 }
358                 (NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => {
359                     ExtendOp::UXTB
360                 }
361                 (NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => {
362                     ExtendOp::SXTH
363                 }
364                 (NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => {
365                     ExtendOp::UXTH
366                 }
367                 (NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW,
368                 (NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
369                 _ => unreachable!(),
370             };
371             return ResultRSE::RegExtend(reg, extendop);
372         }
373 
374         // Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
375         if op == Opcode::Uextend || op == Opcode::Sextend {
376             assert!(out_bits == 32 || out_bits == 64);
377             let sign_extend = op == Opcode::Sextend;
378             let inner_ty = ctx.input_ty(insn, 0);
379             let inner_bits = ty_bits(inner_ty);
380             assert!(inner_bits < out_bits);
381             let extendop = match (sign_extend, inner_bits) {
382                 (true, 1) => ExtendOp::SXTB,
383                 (false, 1) => ExtendOp::UXTB,
384                 (true, 8) => ExtendOp::SXTB,
385                 (false, 8) => ExtendOp::UXTB,
386                 (true, 16) => ExtendOp::SXTH,
387                 (false, 16) => ExtendOp::UXTH,
388                 (true, 32) => ExtendOp::SXTW,
389                 (false, 32) => ExtendOp::UXTW,
390                 _ => unreachable!(),
391             };
392             let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
393             return ResultRSE::RegExtend(reg, extendop);
394         }
395     }
396 
397     ResultRSE::from_rs(input_to_rs(ctx, input, narrow_mode))
398 }
399 
input_to_rse_imm12<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSEImm12400 pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
401     ctx: &mut C,
402     input: InsnInput,
403     narrow_mode: NarrowValueMode,
404 ) -> ResultRSEImm12 {
405     if let Some(imm_value) = input_to_const(ctx, input) {
406         if let Some(i) = Imm12::maybe_from_u64(imm_value) {
407             return ResultRSEImm12::Imm12(i);
408         }
409     }
410 
411     ResultRSEImm12::from_rse(input_to_rse(ctx, input, narrow_mode))
412 }
413 
input_to_rs_immlogic<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSImmLogic414 pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
415     ctx: &mut C,
416     input: InsnInput,
417     narrow_mode: NarrowValueMode,
418 ) -> ResultRSImmLogic {
419     if let Some(imm_value) = input_to_const(ctx, input) {
420         let ty = ctx.input_ty(input.insn, input.input);
421         let ty = if ty_bits(ty) < 32 { I32 } else { ty };
422         if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
423             return ResultRSImmLogic::ImmLogic(i);
424         }
425     }
426 
427     ResultRSImmLogic::from_rs(input_to_rs(ctx, input, narrow_mode))
428 }
429 
input_to_reg_immshift<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> ResultRegImmShift430 pub(crate) fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
431     ctx: &mut C,
432     input: InsnInput,
433 ) -> ResultRegImmShift {
434     if let Some(imm_value) = input_to_const(ctx, input) {
435         if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
436             return ResultRegImmShift::ImmShift(immshift);
437         }
438     }
439 
440     ResultRegImmShift::Reg(input_to_reg(ctx, input, NarrowValueMode::None))
441 }
442 
443 //============================================================================
444 // ALU instruction constructors.
445 
alu_inst_imm12(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSEImm12) -> Inst446 pub(crate) fn alu_inst_imm12(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSEImm12) -> Inst {
447     match rm {
448         ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 {
449             alu_op: op,
450             rd,
451             rn,
452             imm12,
453         },
454         ResultRSEImm12::Reg(rm) => Inst::AluRRR {
455             alu_op: op,
456             rd,
457             rn,
458             rm,
459         },
460         ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift {
461             alu_op: op,
462             rd,
463             rn,
464             rm,
465             shiftop,
466         },
467         ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend {
468             alu_op: op,
469             rd,
470             rn,
471             rm,
472             extendop,
473         },
474     }
475 }
476 
alu_inst_immlogic( op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSImmLogic, ) -> Inst477 pub(crate) fn alu_inst_immlogic(
478     op: ALUOp,
479     rd: Writable<Reg>,
480     rn: Reg,
481     rm: ResultRSImmLogic,
482 ) -> Inst {
483     match rm {
484         ResultRSImmLogic::ImmLogic(imml) => Inst::AluRRImmLogic {
485             alu_op: op,
486             rd,
487             rn,
488             imml,
489         },
490         ResultRSImmLogic::Reg(rm) => Inst::AluRRR {
491             alu_op: op,
492             rd,
493             rn,
494             rm,
495         },
496         ResultRSImmLogic::RegShift(rm, shiftop) => Inst::AluRRRShift {
497             alu_op: op,
498             rd,
499             rn,
500             rm,
501             shiftop,
502         },
503     }
504 }
505 
alu_inst_immshift( op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRegImmShift, ) -> Inst506 pub(crate) fn alu_inst_immshift(
507     op: ALUOp,
508     rd: Writable<Reg>,
509     rn: Reg,
510     rm: ResultRegImmShift,
511 ) -> Inst {
512     match rm {
513         ResultRegImmShift::ImmShift(immshift) => Inst::AluRRImmShift {
514             alu_op: op,
515             rd,
516             rn,
517             immshift,
518         },
519         ResultRegImmShift::Reg(rm) => Inst::AluRRR {
520             alu_op: op,
521             rd,
522             rn,
523             rm,
524         },
525     }
526 }
527 
528 //============================================================================
529 // Lowering: addressing mode support. Takes instruction directly, rather
530 // than an `InsnInput`, to do more introspection.
531 
532 /// Lower the address of a load or store.
lower_address<C: LowerCtx<I = Inst>>( ctx: &mut C, elem_ty: Type, addends: &[InsnInput], offset: i32, ) -> MemArg533 pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
534     ctx: &mut C,
535     elem_ty: Type,
536     addends: &[InsnInput],
537     offset: i32,
538 ) -> MemArg {
539     // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
540     // mul instructions (Load/StoreComplex don't include scale factors).
541 
542     // Handle one reg and offset.
543     if addends.len() == 1 {
544         let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
545         return MemArg::RegOffset(reg, offset as i64, elem_ty);
546     }
547 
548     // Handle two regs and a zero offset, if possible.
549     if addends.len() == 2 && offset == 0 {
550         let ra = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
551         let rb = input_to_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
552         return MemArg::reg_plus_reg(ra, rb);
553     }
554 
555     // Otherwise, generate add instructions.
556     let addr = ctx.alloc_tmp(RegClass::I64, I64);
557 
558     // Get the const into a reg.
559     lower_constant_u64(ctx, addr.clone(), offset as u64);
560 
561     // Add each addend to the address.
562     for addend in addends {
563         let reg = input_to_reg(ctx, *addend, NarrowValueMode::ZeroExtend64);
564 
565         // In an addition, the stack register is the zero register, so divert it to another
566         // register just before doing the actual add.
567         let reg = if reg == stack_reg() {
568             let tmp = ctx.alloc_tmp(RegClass::I64, I64);
569             ctx.emit(Inst::Mov {
570                 rd: tmp,
571                 rm: stack_reg(),
572             });
573             tmp.to_reg()
574         } else {
575             reg
576         };
577 
578         ctx.emit(Inst::AluRRR {
579             alu_op: ALUOp::Add64,
580             rd: addr.clone(),
581             rn: addr.to_reg(),
582             rm: reg.clone(),
583         });
584     }
585 
586     MemArg::reg(addr.to_reg())
587 }
588 
lower_constant_u64<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: u64, )589 pub(crate) fn lower_constant_u64<C: LowerCtx<I = Inst>>(
590     ctx: &mut C,
591     rd: Writable<Reg>,
592     value: u64,
593 ) {
594     for inst in Inst::load_constant(rd, value) {
595         ctx.emit(inst);
596     }
597 }
598 
lower_constant_f32<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: f32, )599 pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
600     ctx: &mut C,
601     rd: Writable<Reg>,
602     value: f32,
603 ) {
604     ctx.emit(Inst::load_fp_constant32(rd, value));
605 }
606 
lower_constant_f64<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: f64, )607 pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
608     ctx: &mut C,
609     rd: Writable<Reg>,
610     value: f64,
611 ) {
612     ctx.emit(Inst::load_fp_constant64(rd, value));
613 }
614 
lower_constant_f128<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: u128, )615 pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
616     ctx: &mut C,
617     rd: Writable<Reg>,
618     value: u128,
619 ) {
620     ctx.emit(Inst::load_fp_constant128(rd, value));
621 }
622 
lower_condcode(cc: IntCC) -> Cond623 pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
624     match cc {
625         IntCC::Equal => Cond::Eq,
626         IntCC::NotEqual => Cond::Ne,
627         IntCC::SignedGreaterThanOrEqual => Cond::Ge,
628         IntCC::SignedGreaterThan => Cond::Gt,
629         IntCC::SignedLessThanOrEqual => Cond::Le,
630         IntCC::SignedLessThan => Cond::Lt,
631         IntCC::UnsignedGreaterThanOrEqual => Cond::Hs,
632         IntCC::UnsignedGreaterThan => Cond::Hi,
633         IntCC::UnsignedLessThanOrEqual => Cond::Ls,
634         IntCC::UnsignedLessThan => Cond::Lo,
635         IntCC::Overflow => Cond::Vs,
636         IntCC::NotOverflow => Cond::Vc,
637     }
638 }
639 
lower_fp_condcode(cc: FloatCC) -> Cond640 pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
641     // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
642     // The FCMP instruction sets:
643     //               NZCV
644     // - PCSR.NZCV = 0011 on UN (unordered),
645     //               0110 on EQ,
646     //               1000 on LT,
647     //               0010 on GT.
648     match cc {
649         // EQ | LT | GT. Vc => V clear.
650         FloatCC::Ordered => Cond::Vc,
651         // UN. Vs => V set.
652         FloatCC::Unordered => Cond::Vs,
653         // EQ. Eq => Z set.
654         FloatCC::Equal => Cond::Eq,
655         // UN | LT | GT. Ne => Z clear.
656         FloatCC::NotEqual => Cond::Ne,
657         // LT | GT.
658         FloatCC::OrderedNotEqual => unimplemented!(),
659         //  UN | EQ
660         FloatCC::UnorderedOrEqual => unimplemented!(),
661         // LT. Mi => N set.
662         FloatCC::LessThan => Cond::Mi,
663         // LT | EQ. Ls => C clear or Z set.
664         FloatCC::LessThanOrEqual => Cond::Ls,
665         // GT. Gt => Z clear, N = V.
666         FloatCC::GreaterThan => Cond::Gt,
667         // GT | EQ. Ge => N = V.
668         FloatCC::GreaterThanOrEqual => Cond::Ge,
669         // UN | LT
670         FloatCC::UnorderedOrLessThan => unimplemented!(),
671         // UN | LT | EQ
672         FloatCC::UnorderedOrLessThanOrEqual => unimplemented!(),
673         // UN | GT
674         FloatCC::UnorderedOrGreaterThan => unimplemented!(),
675         // UN | GT | EQ
676         FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!(),
677     }
678 }
679 
680 /// Determines whether this condcode interprets inputs as signed or
681 /// unsigned.  See the documentation for the `icmp` instruction in
682 /// cranelift-codegen/meta/src/shared/instructions.rs for further insights
683 /// into this.
condcode_is_signed(cc: IntCC) -> bool684 pub fn condcode_is_signed(cc: IntCC) -> bool {
685     match cc {
686         IntCC::Equal => false,
687         IntCC::NotEqual => false,
688         IntCC::SignedGreaterThanOrEqual => true,
689         IntCC::SignedGreaterThan => true,
690         IntCC::SignedLessThanOrEqual => true,
691         IntCC::SignedLessThan => true,
692         IntCC::UnsignedGreaterThanOrEqual => false,
693         IntCC::UnsignedGreaterThan => false,
694         IntCC::UnsignedLessThanOrEqual => false,
695         IntCC::UnsignedLessThan => false,
696         IntCC::Overflow => true,
697         IntCC::NotOverflow => true,
698     }
699 }
700 
701 //=============================================================================
702 // Helpers for instruction lowering.
703 
704 /// Returns the size (in bits) of a given type.
ty_bits(ty: Type) -> usize705 pub fn ty_bits(ty: Type) -> usize {
706     match ty {
707         B1 => 1,
708         B8 | I8 => 8,
709         B16 | I16 => 16,
710         B32 | I32 | F32 => 32,
711         B64 | I64 | F64 => 64,
712         B128 | I128 => 128,
713         IFLAGS | FFLAGS => 32,
714         I8X16 | B8X16 => 128,
715         _ => panic!("ty_bits() on unknown type: {:?}", ty),
716     }
717 }
718 
ty_is_int(ty: Type) -> bool719 pub(crate) fn ty_is_int(ty: Type) -> bool {
720     match ty {
721         B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
722         F32 | F64 | B128 | I128 | I8X16 => false,
723         IFLAGS | FFLAGS => panic!("Unexpected flags type"),
724         _ => panic!("ty_is_int() on unknown type: {:?}", ty),
725     }
726 }
727 
ty_is_float(ty: Type) -> bool728 pub(crate) fn ty_is_float(ty: Type) -> bool {
729     !ty_is_int(ty)
730 }
731 
choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T732 pub(crate) fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
733     let bits = ty_bits(ty);
734     if bits <= 32 {
735         op32
736     } else if bits == 64 {
737         op64
738     } else {
739         panic!("choose_32_64 on > 64 bits!")
740     }
741 }
742 
ldst_offset(data: &InstructionData) -> Option<i32>743 pub(crate) fn ldst_offset(data: &InstructionData) -> Option<i32> {
744     match data {
745         &InstructionData::Load { offset, .. }
746         | &InstructionData::StackLoad { offset, .. }
747         | &InstructionData::LoadComplex { offset, .. }
748         | &InstructionData::Store { offset, .. }
749         | &InstructionData::StackStore { offset, .. }
750         | &InstructionData::StoreComplex { offset, .. } => Some(offset.into()),
751         _ => None,
752     }
753 }
754 
inst_condcode(data: &InstructionData) -> Option<IntCC>755 pub(crate) fn inst_condcode(data: &InstructionData) -> Option<IntCC> {
756     match data {
757         &InstructionData::IntCond { cond, .. }
758         | &InstructionData::BranchIcmp { cond, .. }
759         | &InstructionData::IntCompare { cond, .. }
760         | &InstructionData::IntCondTrap { cond, .. }
761         | &InstructionData::BranchInt { cond, .. }
762         | &InstructionData::IntSelect { cond, .. }
763         | &InstructionData::IntCompareImm { cond, .. } => Some(cond),
764         _ => None,
765     }
766 }
767 
inst_fp_condcode(data: &InstructionData) -> Option<FloatCC>768 pub(crate) fn inst_fp_condcode(data: &InstructionData) -> Option<FloatCC> {
769     match data {
770         &InstructionData::BranchFloat { cond, .. }
771         | &InstructionData::FloatCompare { cond, .. }
772         | &InstructionData::FloatCond { cond, .. }
773         | &InstructionData::FloatCondTrap { cond, .. } => Some(cond),
774         _ => None,
775     }
776 }
777 
inst_trapcode(data: &InstructionData) -> Option<TrapCode>778 pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
779     match data {
780         &InstructionData::Trap { code, .. }
781         | &InstructionData::CondTrap { code, .. }
782         | &InstructionData::IntCondTrap { code, .. }
783         | &InstructionData::FloatCondTrap { code, .. } => Some(code),
784         _ => None,
785     }
786 }
787 
788 /// Checks for an instance of `op` feeding the given input.
maybe_input_insn<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, ) -> Option<IRInst>789 pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
790     c: &mut C,
791     input: InsnInput,
792     op: Opcode,
793 ) -> Option<IRInst> {
794     let inputs = c.get_input(input.insn, input.input);
795     debug!(
796         "maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
797         input, inputs, op
798     );
799     if let Some((src_inst, _)) = inputs.inst {
800         let data = c.data(src_inst);
801         debug!(" -> input inst {:?}", data);
802         if data.opcode() == op {
803             return Some(src_inst);
804         }
805     }
806     None
807 }
808 
809 /// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
810 /// Bint or a bitcast).
811 ///
812 /// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
813 /// a bit more generic.
maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, conv: Opcode, ) -> Option<IRInst>814 pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
815     c: &mut C,
816     input: InsnInput,
817     op: Opcode,
818     conv: Opcode,
819 ) -> Option<IRInst> {
820     let inputs = c.get_input(input.insn, input.input);
821     if let Some((src_inst, _)) = inputs.inst {
822         let data = c.data(src_inst);
823         if data.opcode() == op {
824             return Some(src_inst);
825         }
826         if data.opcode() == conv {
827             let inputs = c.get_input(src_inst, 0);
828             if let Some((src_inst, _)) = inputs.inst {
829                 let data = c.data(src_inst);
830                 if data.opcode() == op {
831                     return Some(src_inst);
832                 }
833             }
834         }
835     }
836     None
837 }
838 
lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>( ctx: &mut C, insn: IRInst, is_signed: bool, )839 pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
840     ctx: &mut C,
841     insn: IRInst,
842     is_signed: bool,
843 ) {
844     debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
845     let ty = ctx.input_ty(insn, 0);
846     let bits = ty_bits(ty);
847     let narrow_mode = match (bits <= 32, is_signed) {
848         (true, true) => NarrowValueMode::SignExtend32,
849         (true, false) => NarrowValueMode::ZeroExtend32,
850         (false, true) => NarrowValueMode::SignExtend64,
851         (false, false) => NarrowValueMode::ZeroExtend64,
852     };
853     let inputs = [
854         InsnInput {
855             insn: insn,
856             input: 0,
857         },
858         InsnInput {
859             insn: insn,
860             input: 1,
861         },
862     ];
863     let ty = ctx.input_ty(insn, 0);
864     let rn = input_to_reg(ctx, inputs[0], narrow_mode);
865     let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
866     debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
867     let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
868     let rd = writable_zero_reg();
869     ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
870 }
871 
lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst)872 pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
873     let ty = ctx.input_ty(insn, 0);
874     let bits = ty_bits(ty);
875     let inputs = [
876         InsnInput {
877             insn: insn,
878             input: 0,
879         },
880         InsnInput {
881             insn: insn,
882             input: 1,
883         },
884     ];
885     let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
886     let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
887     match bits {
888         32 => {
889             ctx.emit(Inst::FpuCmp32 { rn, rm });
890         }
891         64 => {
892             ctx.emit(Inst::FpuCmp64 { rn, rm });
893         }
894         _ => panic!("Unknown float size"),
895     }
896 }
897 
898 //=============================================================================
899 // Lowering-backend trait implementation.
900 
901 impl LowerBackend for AArch64Backend {
902     type MInst = Inst;
903 
lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()>904     fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
905         lower_inst::lower_insn_to_regs(ctx, ir_inst)
906     }
907 
lower_branch_group<C: LowerCtx<I = Inst>>( &self, ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], fallthrough: Option<MachLabel>, ) -> CodegenResult<()>908     fn lower_branch_group<C: LowerCtx<I = Inst>>(
909         &self,
910         ctx: &mut C,
911         branches: &[IRInst],
912         targets: &[MachLabel],
913         fallthrough: Option<MachLabel>,
914     ) -> CodegenResult<()> {
915         lower_inst::lower_branch(ctx, branches, targets, fallthrough)
916     }
917 
maybe_pinned_reg(&self) -> Option<Reg>918     fn maybe_pinned_reg(&self) -> Option<Reg> {
919         Some(xreg(PINNED_REG))
920     }
921 }
922