1 //! Lowering rules for AArch64.
2 //!
3 //! TODO: opportunities for better code generation:
4 //!
5 //! - Smarter use of addressing modes. Recognize a+SCALE*b patterns; recognize
6 //!   and incorporate sign/zero extension on indices. Recognize pre/post-index
7 //!   opportunities.
8 //!
9 //! - Floating-point immediates (FIMM instruction).
10 
11 use crate::ir::condcodes::{FloatCC, IntCC};
12 use crate::ir::types::*;
13 use crate::ir::Inst as IRInst;
14 use crate::ir::{InstructionData, Opcode, TrapCode, Type};
15 use crate::machinst::lower::*;
16 use crate::machinst::*;
17 
18 use crate::isa::aarch64::inst::*;
19 use crate::isa::aarch64::AArch64Backend;
20 
21 use super::lower_inst;
22 
23 use regalloc::{Reg, RegClass, Writable};
24 
25 //============================================================================
26 // Result enum types.
27 //
28 // Lowering of a given value results in one of these enums, depending on the
29 // modes in which we can accept the value.
30 
31 /// A lowering result: register, register-shift.  An SSA value can always be
32 /// lowered into one of these options; the register form is the fallback.
33 #[derive(Clone, Debug)]
34 enum ResultRS {
35     Reg(Reg),
36     RegShift(Reg, ShiftOpAndAmt),
37 }
38 
39 /// A lowering result: register, register-shift, register-extend.  An SSA value can always be
40 /// lowered into one of these options; the register form is the fallback.
41 #[derive(Clone, Debug)]
42 enum ResultRSE {
43     Reg(Reg),
44     RegShift(Reg, ShiftOpAndAmt),
45     RegExtend(Reg, ExtendOp),
46 }
47 
48 impl ResultRSE {
from_rs(rs: ResultRS) -> ResultRSE49     fn from_rs(rs: ResultRS) -> ResultRSE {
50         match rs {
51             ResultRS::Reg(r) => ResultRSE::Reg(r),
52             ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s),
53         }
54     }
55 }
56 
57 /// A lowering result: register, register-shift, register-extend, or 12-bit immediate form.
58 /// An SSA value can always be lowered into one of these options; the register form is the
59 /// fallback.
60 #[derive(Clone, Debug)]
61 pub(crate) enum ResultRSEImm12 {
62     Reg(Reg),
63     RegShift(Reg, ShiftOpAndAmt),
64     RegExtend(Reg, ExtendOp),
65     Imm12(Imm12),
66 }
67 
68 impl ResultRSEImm12 {
from_rse(rse: ResultRSE) -> ResultRSEImm1269     fn from_rse(rse: ResultRSE) -> ResultRSEImm12 {
70         match rse {
71             ResultRSE::Reg(r) => ResultRSEImm12::Reg(r),
72             ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s),
73             ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e),
74         }
75     }
76 }
77 
78 /// A lowering result: register, register-shift, or logical immediate form.
79 /// An SSA value can always be lowered into one of these options; the register form is the
80 /// fallback.
81 #[derive(Clone, Debug)]
82 pub(crate) enum ResultRSImmLogic {
83     Reg(Reg),
84     RegShift(Reg, ShiftOpAndAmt),
85     ImmLogic(ImmLogic),
86 }
87 
88 impl ResultRSImmLogic {
from_rs(rse: ResultRS) -> ResultRSImmLogic89     fn from_rs(rse: ResultRS) -> ResultRSImmLogic {
90         match rse {
91             ResultRS::Reg(r) => ResultRSImmLogic::Reg(r),
92             ResultRS::RegShift(r, s) => ResultRSImmLogic::RegShift(r, s),
93         }
94     }
95 }
96 
97 /// A lowering result: register or immediate shift amount (arg to a shift op).
98 /// An SSA value can always be lowered into one of these options; the register form is the
99 /// fallback.
100 #[derive(Clone, Debug)]
101 pub(crate) enum ResultRegImmShift {
102     Reg(Reg),
103     ImmShift(ImmShift),
104 }
105 
106 //============================================================================
107 // Instruction input and output "slots".
108 //
109 // We use these types to refer to operand numbers, and result numbers, together
110 // with the associated instruction, in a type-safe way.
111 
112 /// Identifier for a particular output of an instruction.
113 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
114 pub(crate) struct InsnOutput {
115     pub(crate) insn: IRInst,
116     pub(crate) output: usize,
117 }
118 
119 /// Identifier for a particular input of an instruction.
120 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
121 pub(crate) struct InsnInput {
122     pub(crate) insn: IRInst,
123     pub(crate) input: usize,
124 }
125 
126 /// Producer of a value: either a previous instruction's output, or a register that will be
127 /// codegen'd separately.
128 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
129 pub(crate) enum InsnInputSource {
130     Output(InsnOutput),
131     Reg(Reg),
132 }
133 
134 impl InsnInputSource {
as_output(self) -> Option<InsnOutput>135     fn as_output(self) -> Option<InsnOutput> {
136         match self {
137             InsnInputSource::Output(o) => Some(o),
138             _ => None,
139         }
140     }
141 }
142 
get_input<C: LowerCtx<I = Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput143 fn get_input<C: LowerCtx<I = Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
144     assert!(num <= ctx.num_inputs(output.insn));
145     InsnInput {
146         insn: output.insn,
147         input: num,
148     }
149 }
150 
151 /// Convert an instruction input to a producing instruction's output if possible (in same BB), or a
152 /// register otherwise.
input_source<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource153 fn input_source<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
154     if let Some((input_inst, result_num)) = ctx.input_inst(input.insn, input.input) {
155         let out = InsnOutput {
156             insn: input_inst,
157             output: result_num,
158         };
159         InsnInputSource::Output(out)
160     } else {
161         let reg = ctx.input(input.insn, input.input);
162         InsnInputSource::Reg(reg)
163     }
164 }
165 
166 //============================================================================
167 // Lowering: convert instruction outputs to result types.
168 
169 /// Lower an instruction output to a 64-bit constant, if possible.
output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64>170 pub(crate) fn output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
171     if out.output > 0 {
172         None
173     } else {
174         let inst_data = ctx.data(out.insn);
175         if inst_data.opcode() == Opcode::Null {
176             Some(0)
177         } else {
178             match inst_data {
179                 &InstructionData::UnaryImm { opcode: _, imm } => {
180                     // Only has Into for i64; we use u64 elsewhere, so we cast.
181                     let imm: i64 = imm.into();
182                     Some(imm as u64)
183                 }
184                 &InstructionData::UnaryBool { opcode: _, imm } => Some(u64::from(imm)),
185                 &InstructionData::UnaryIeee32 { opcode: _, imm } => Some(u64::from(imm.bits())),
186                 &InstructionData::UnaryIeee64 { opcode: _, imm } => Some(imm.bits()),
187                 _ => None,
188             }
189         }
190     }
191 }
192 
output_to_const_f32<C: LowerCtx<I = Inst>>( ctx: &mut C, out: InsnOutput, ) -> Option<f32>193 pub(crate) fn output_to_const_f32<C: LowerCtx<I = Inst>>(
194     ctx: &mut C,
195     out: InsnOutput,
196 ) -> Option<f32> {
197     output_to_const(ctx, out).map(|value| f32::from_bits(value as u32))
198 }
199 
output_to_const_f64<C: LowerCtx<I = Inst>>( ctx: &mut C, out: InsnOutput, ) -> Option<f64>200 pub(crate) fn output_to_const_f64<C: LowerCtx<I = Inst>>(
201     ctx: &mut C,
202     out: InsnOutput,
203 ) -> Option<f64> {
204     output_to_const(ctx, out).map(|value| f64::from_bits(value))
205 }
206 
207 /// Lower an instruction output to a constant register-shift amount, if possible.
output_to_shiftimm<C: LowerCtx<I = Inst>>( ctx: &mut C, out: InsnOutput, ) -> Option<ShiftOpShiftImm>208 pub(crate) fn output_to_shiftimm<C: LowerCtx<I = Inst>>(
209     ctx: &mut C,
210     out: InsnOutput,
211 ) -> Option<ShiftOpShiftImm> {
212     output_to_const(ctx, out).and_then(ShiftOpShiftImm::maybe_from_shift)
213 }
214 
215 /// How to handle narrow values loaded into registers; see note on `narrow_mode`
216 /// parameter to `input_to_*` below.
217 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
218 pub(crate) enum NarrowValueMode {
219     None,
220     /// Zero-extend to 32 bits if original is < 32 bits.
221     ZeroExtend32,
222     /// Sign-extend to 32 bits if original is < 32 bits.
223     SignExtend32,
224     /// Zero-extend to 64 bits if original is < 64 bits.
225     ZeroExtend64,
226     /// Sign-extend to 64 bits if original is < 64 bits.
227     SignExtend64,
228 }
229 
230 impl NarrowValueMode {
is_32bit(&self) -> bool231     fn is_32bit(&self) -> bool {
232         match self {
233             NarrowValueMode::None => false,
234             NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true,
235             NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false,
236         }
237     }
238 }
239 
240 /// Lower an instruction output to a reg.
output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg>241 pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
242     ctx.output(out.insn, out.output)
243 }
244 
245 /// Lower an instruction input to a reg.
246 ///
247 /// The given register will be extended appropriately, according to
248 /// `narrow_mode` and the input's type. If extended, the value is
249 /// always extended to 64 bits, for simplicity.
input_to_reg<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> Reg250 pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
251     ctx: &mut C,
252     input: InsnInput,
253     narrow_mode: NarrowValueMode,
254 ) -> Reg {
255     let ty = ctx.input_ty(input.insn, input.input);
256     let from_bits = ty_bits(ty) as u8;
257     let in_reg = ctx.input(input.insn, input.input);
258     match (narrow_mode, from_bits) {
259         (NarrowValueMode::None, _) => in_reg,
260         (NarrowValueMode::ZeroExtend32, n) if n < 32 => {
261             let tmp = ctx.tmp(RegClass::I64, I32);
262             ctx.emit(Inst::Extend {
263                 rd: tmp,
264                 rn: in_reg,
265                 signed: false,
266                 from_bits,
267                 to_bits: 32,
268             });
269             tmp.to_reg()
270         }
271         (NarrowValueMode::SignExtend32, n) if n < 32 => {
272             let tmp = ctx.tmp(RegClass::I64, I32);
273             ctx.emit(Inst::Extend {
274                 rd: tmp,
275                 rn: in_reg,
276                 signed: true,
277                 from_bits,
278                 to_bits: 32,
279             });
280             tmp.to_reg()
281         }
282         (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
283 
284         (NarrowValueMode::ZeroExtend64, n) if n < 64 => {
285             let tmp = ctx.tmp(RegClass::I64, I32);
286             ctx.emit(Inst::Extend {
287                 rd: tmp,
288                 rn: in_reg,
289                 signed: false,
290                 from_bits,
291                 to_bits: 64,
292             });
293             tmp.to_reg()
294         }
295         (NarrowValueMode::SignExtend64, n) if n < 64 => {
296             let tmp = ctx.tmp(RegClass::I64, I32);
297             ctx.emit(Inst::Extend {
298                 rd: tmp,
299                 rn: in_reg,
300                 signed: true,
301                 from_bits,
302                 to_bits: 64,
303             });
304             tmp.to_reg()
305         }
306         (_, 64) => in_reg,
307 
308         _ => panic!(
309             "Unsupported input width: input ty {} bits {} mode {:?}",
310             ty, from_bits, narrow_mode
311         ),
312     }
313 }
314 
315 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
316 /// This does not actually codegen the source instruction; it just uses the
317 /// vreg into which the source instruction will generate its value.
318 ///
319 /// The `narrow_mode` flag indicates whether the consumer of this value needs
320 /// the high bits clear. For many operations, such as an add/sub/mul or any
321 /// bitwise logical operation, the low-bit results depend only on the low-bit
322 /// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit
323 /// value is stored in the low 8 bits of the register and the high 24 bits are
324 /// undefined. If the op truly needs the high N bits clear (such as for a
325 /// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
326 /// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
327 /// register will be provided the extended value.
input_to_rs<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRS328 fn input_to_rs<C: LowerCtx<I = Inst>>(
329     ctx: &mut C,
330     input: InsnInput,
331     narrow_mode: NarrowValueMode,
332 ) -> ResultRS {
333     if let InsnInputSource::Output(out) = input_source(ctx, input) {
334         let insn = out.insn;
335         assert!(out.output <= ctx.num_outputs(insn));
336         let op = ctx.data(insn).opcode();
337 
338         if op == Opcode::Ishl {
339             let shiftee = get_input(ctx, out, 0);
340             let shift_amt = get_input(ctx, out, 1);
341 
342             // Can we get the shift amount as an immediate?
343             if let Some(shift_amt_out) = input_source(ctx, shift_amt).as_output() {
344                 if let Some(shiftimm) = output_to_shiftimm(ctx, shift_amt_out) {
345                     let reg = input_to_reg(ctx, shiftee, narrow_mode);
346                     ctx.merged(insn);
347                     ctx.merged(shift_amt_out.insn);
348                     return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
349                 }
350             }
351         }
352     }
353 
354     ResultRS::Reg(input_to_reg(ctx, input, narrow_mode))
355 }
356 
357 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
358 /// This does not actually codegen the source instruction; it just uses the
359 /// vreg into which the source instruction will generate its value.
360 ///
361 /// See note on `input_to_rs` for a description of `narrow_mode`.
input_to_rse<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSE362 fn input_to_rse<C: LowerCtx<I = Inst>>(
363     ctx: &mut C,
364     input: InsnInput,
365     narrow_mode: NarrowValueMode,
366 ) -> ResultRSE {
367     if let InsnInputSource::Output(out) = input_source(ctx, input) {
368         let insn = out.insn;
369         assert!(out.output <= ctx.num_outputs(insn));
370         let op = ctx.data(insn).opcode();
371         let out_ty = ctx.output_ty(insn, out.output);
372         let out_bits = ty_bits(out_ty);
373 
374         // If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
375         // then get the result into a register and return an Extend-mode operand on
376         // that register.
377         if narrow_mode != NarrowValueMode::None
378             && ((narrow_mode.is_32bit() && out_bits < 32)
379                 || (!narrow_mode.is_32bit() && out_bits < 64))
380         {
381             let reg = output_to_reg(ctx, out);
382             let extendop = match (narrow_mode, out_bits) {
383                 (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
384                     ExtendOp::SXTB
385                 }
386                 (NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => {
387                     ExtendOp::UXTB
388                 }
389                 (NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => {
390                     ExtendOp::SXTB
391                 }
392                 (NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => {
393                     ExtendOp::UXTB
394                 }
395                 (NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => {
396                     ExtendOp::SXTH
397                 }
398                 (NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => {
399                     ExtendOp::UXTH
400                 }
401                 (NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW,
402                 (NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
403                 _ => unreachable!(),
404             };
405             return ResultRSE::RegExtend(reg.to_reg(), extendop);
406         }
407 
408         // Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
409         if op == Opcode::Uextend || op == Opcode::Sextend {
410             assert!(out_bits == 32 || out_bits == 64);
411             let sign_extend = op == Opcode::Sextend;
412             let extendee = get_input(ctx, out, 0);
413             let inner_ty = ctx.input_ty(extendee.insn, extendee.input);
414             let inner_bits = ty_bits(inner_ty);
415             assert!(inner_bits < out_bits);
416             let extendop = match (sign_extend, inner_bits) {
417                 (true, 1) => ExtendOp::SXTB,
418                 (false, 1) => ExtendOp::UXTB,
419                 (true, 8) => ExtendOp::SXTB,
420                 (false, 8) => ExtendOp::UXTB,
421                 (true, 16) => ExtendOp::SXTH,
422                 (false, 16) => ExtendOp::UXTH,
423                 (true, 32) => ExtendOp::SXTW,
424                 (false, 32) => ExtendOp::UXTW,
425                 _ => unreachable!(),
426             };
427             let reg = input_to_reg(ctx, extendee, NarrowValueMode::None);
428             ctx.merged(insn);
429             return ResultRSE::RegExtend(reg, extendop);
430         }
431     }
432 
433     ResultRSE::from_rs(input_to_rs(ctx, input, narrow_mode))
434 }
435 
input_to_rse_imm12<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSEImm12436 pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
437     ctx: &mut C,
438     input: InsnInput,
439     narrow_mode: NarrowValueMode,
440 ) -> ResultRSEImm12 {
441     if let InsnInputSource::Output(out) = input_source(ctx, input) {
442         if let Some(imm_value) = output_to_const(ctx, out) {
443             if let Some(i) = Imm12::maybe_from_u64(imm_value) {
444                 ctx.merged(out.insn);
445                 return ResultRSEImm12::Imm12(i);
446             }
447         }
448     }
449 
450     ResultRSEImm12::from_rse(input_to_rse(ctx, input, narrow_mode))
451 }
452 
input_to_rs_immlogic<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSImmLogic453 pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
454     ctx: &mut C,
455     input: InsnInput,
456     narrow_mode: NarrowValueMode,
457 ) -> ResultRSImmLogic {
458     if let InsnInputSource::Output(out) = input_source(ctx, input) {
459         if let Some(imm_value) = output_to_const(ctx, out) {
460             let ty = ctx.output_ty(out.insn, out.output);
461             let ty = if ty_bits(ty) < 32 { I32 } else { ty };
462             if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
463                 ctx.merged(out.insn);
464                 return ResultRSImmLogic::ImmLogic(i);
465             }
466         }
467     }
468 
469     ResultRSImmLogic::from_rs(input_to_rs(ctx, input, narrow_mode))
470 }
471 
input_to_reg_immshift<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> ResultRegImmShift472 pub(crate) fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
473     ctx: &mut C,
474     input: InsnInput,
475 ) -> ResultRegImmShift {
476     if let InsnInputSource::Output(out) = input_source(ctx, input) {
477         if let Some(imm_value) = output_to_const(ctx, out) {
478             if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
479                 ctx.merged(out.insn);
480                 return ResultRegImmShift::ImmShift(immshift);
481             }
482         }
483     }
484 
485     ResultRegImmShift::Reg(input_to_reg(ctx, input, NarrowValueMode::None))
486 }
487 
488 //============================================================================
489 // ALU instruction constructors.
490 
alu_inst_imm12(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSEImm12) -> Inst491 pub(crate) fn alu_inst_imm12(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSEImm12) -> Inst {
492     match rm {
493         ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 {
494             alu_op: op,
495             rd,
496             rn,
497             imm12,
498         },
499         ResultRSEImm12::Reg(rm) => Inst::AluRRR {
500             alu_op: op,
501             rd,
502             rn,
503             rm,
504         },
505         ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift {
506             alu_op: op,
507             rd,
508             rn,
509             rm,
510             shiftop,
511         },
512         ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend {
513             alu_op: op,
514             rd,
515             rn,
516             rm,
517             extendop,
518         },
519     }
520 }
521 
alu_inst_immlogic( op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSImmLogic, ) -> Inst522 pub(crate) fn alu_inst_immlogic(
523     op: ALUOp,
524     rd: Writable<Reg>,
525     rn: Reg,
526     rm: ResultRSImmLogic,
527 ) -> Inst {
528     match rm {
529         ResultRSImmLogic::ImmLogic(imml) => Inst::AluRRImmLogic {
530             alu_op: op,
531             rd,
532             rn,
533             imml,
534         },
535         ResultRSImmLogic::Reg(rm) => Inst::AluRRR {
536             alu_op: op,
537             rd,
538             rn,
539             rm,
540         },
541         ResultRSImmLogic::RegShift(rm, shiftop) => Inst::AluRRRShift {
542             alu_op: op,
543             rd,
544             rn,
545             rm,
546             shiftop,
547         },
548     }
549 }
550 
alu_inst_immshift( op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRegImmShift, ) -> Inst551 pub(crate) fn alu_inst_immshift(
552     op: ALUOp,
553     rd: Writable<Reg>,
554     rn: Reg,
555     rm: ResultRegImmShift,
556 ) -> Inst {
557     match rm {
558         ResultRegImmShift::ImmShift(immshift) => Inst::AluRRImmShift {
559             alu_op: op,
560             rd,
561             rn,
562             immshift,
563         },
564         ResultRegImmShift::Reg(rm) => Inst::AluRRR {
565             alu_op: op,
566             rd,
567             rn,
568             rm,
569         },
570     }
571 }
572 
573 //============================================================================
574 // Lowering: addressing mode support. Takes instruction directly, rather
575 // than an `InsnInput`, to do more introspection.
576 
577 /// Lower the address of a load or store.
lower_address<C: LowerCtx<I = Inst>>( ctx: &mut C, elem_ty: Type, addends: &[InsnInput], offset: i32, ) -> MemArg578 pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
579     ctx: &mut C,
580     elem_ty: Type,
581     addends: &[InsnInput],
582     offset: i32,
583 ) -> MemArg {
584     // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
585     // mul instructions (Load/StoreComplex don't include scale factors).
586 
587     // Handle one reg and offset that fits in immediate, if possible.
588     if addends.len() == 1 {
589         let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
590         if let Some(memarg) = MemArg::reg_maybe_offset(reg, offset as i64, elem_ty) {
591             return memarg;
592         }
593     }
594 
595     // Handle two regs and a zero offset, if possible.
596     if addends.len() == 2 && offset == 0 {
597         let ra = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
598         let rb = input_to_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
599         return MemArg::reg_plus_reg(ra, rb);
600     }
601 
602     // Otherwise, generate add instructions.
603     let addr = ctx.tmp(RegClass::I64, I64);
604 
605     // Get the const into a reg.
606     lower_constant_u64(ctx, addr.clone(), offset as u64);
607 
608     // Add each addend to the address.
609     for addend in addends {
610         let reg = input_to_reg(ctx, *addend, NarrowValueMode::ZeroExtend64);
611 
612         // In an addition, the stack register is the zero register, so divert it to another
613         // register just before doing the actual add.
614         let reg = if reg == stack_reg() {
615             let tmp = ctx.tmp(RegClass::I64, I64);
616             ctx.emit(Inst::Mov {
617                 rd: tmp,
618                 rm: stack_reg(),
619             });
620             tmp.to_reg()
621         } else {
622             reg
623         };
624 
625         ctx.emit(Inst::AluRRR {
626             alu_op: ALUOp::Add64,
627             rd: addr.clone(),
628             rn: addr.to_reg(),
629             rm: reg.clone(),
630         });
631     }
632 
633     MemArg::reg(addr.to_reg())
634 }
635 
lower_constant_u64<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: u64, )636 pub(crate) fn lower_constant_u64<C: LowerCtx<I = Inst>>(
637     ctx: &mut C,
638     rd: Writable<Reg>,
639     value: u64,
640 ) {
641     for inst in Inst::load_constant(rd, value) {
642         ctx.emit(inst);
643     }
644 }
645 
lower_constant_f32<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: f32, )646 pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
647     ctx: &mut C,
648     rd: Writable<Reg>,
649     value: f32,
650 ) {
651     ctx.emit(Inst::load_fp_constant32(rd, value));
652 }
653 
lower_constant_f64<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: f64, )654 pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
655     ctx: &mut C,
656     rd: Writable<Reg>,
657     value: f64,
658 ) {
659     ctx.emit(Inst::load_fp_constant64(rd, value));
660 }
661 
lower_condcode(cc: IntCC) -> Cond662 pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
663     match cc {
664         IntCC::Equal => Cond::Eq,
665         IntCC::NotEqual => Cond::Ne,
666         IntCC::SignedGreaterThanOrEqual => Cond::Ge,
667         IntCC::SignedGreaterThan => Cond::Gt,
668         IntCC::SignedLessThanOrEqual => Cond::Le,
669         IntCC::SignedLessThan => Cond::Lt,
670         IntCC::UnsignedGreaterThanOrEqual => Cond::Hs,
671         IntCC::UnsignedGreaterThan => Cond::Hi,
672         IntCC::UnsignedLessThanOrEqual => Cond::Ls,
673         IntCC::UnsignedLessThan => Cond::Lo,
674         IntCC::Overflow => Cond::Vs,
675         IntCC::NotOverflow => Cond::Vc,
676     }
677 }
678 
lower_fp_condcode(cc: FloatCC) -> Cond679 pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
680     // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
681     // The FCMP instruction sets:
682     //               NZCV
683     // - PCSR.NZCV = 0011 on UN (unordered),
684     //               0110 on EQ,
685     //               1000 on LT,
686     //               0010 on GT.
687     match cc {
688         // EQ | LT | GT. Vc => V clear.
689         FloatCC::Ordered => Cond::Vc,
690         // UN. Vs => V set.
691         FloatCC::Unordered => Cond::Vs,
692         // EQ. Eq => Z set.
693         FloatCC::Equal => Cond::Eq,
694         // UN | LT | GT. Ne => Z clear.
695         FloatCC::NotEqual => Cond::Ne,
696         // LT | GT.
697         FloatCC::OrderedNotEqual => unimplemented!(),
698         //  UN | EQ
699         FloatCC::UnorderedOrEqual => unimplemented!(),
700         // LT. Mi => N set.
701         FloatCC::LessThan => Cond::Mi,
702         // LT | EQ. Ls => C clear or Z set.
703         FloatCC::LessThanOrEqual => Cond::Ls,
704         // GT. Gt => Z clear, N = V.
705         FloatCC::GreaterThan => Cond::Gt,
706         // GT | EQ. Ge => N = V.
707         FloatCC::GreaterThanOrEqual => Cond::Ge,
708         // UN | LT
709         FloatCC::UnorderedOrLessThan => unimplemented!(),
710         // UN | LT | EQ
711         FloatCC::UnorderedOrLessThanOrEqual => unimplemented!(),
712         // UN | GT
713         FloatCC::UnorderedOrGreaterThan => unimplemented!(),
714         // UN | GT | EQ
715         FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!(),
716     }
717 }
718 
719 /// Determines whether this condcode interprets inputs as signed or
720 /// unsigned.  See the documentation for the `icmp` instruction in
721 /// cranelift-codegen/meta/src/shared/instructions.rs for further insights
722 /// into this.
condcode_is_signed(cc: IntCC) -> bool723 pub fn condcode_is_signed(cc: IntCC) -> bool {
724     match cc {
725         IntCC::Equal => false,
726         IntCC::NotEqual => false,
727         IntCC::SignedGreaterThanOrEqual => true,
728         IntCC::SignedGreaterThan => true,
729         IntCC::SignedLessThanOrEqual => true,
730         IntCC::SignedLessThan => true,
731         IntCC::UnsignedGreaterThanOrEqual => false,
732         IntCC::UnsignedGreaterThan => false,
733         IntCC::UnsignedLessThanOrEqual => false,
734         IntCC::UnsignedLessThan => false,
735         IntCC::Overflow => true,
736         IntCC::NotOverflow => true,
737     }
738 }
739 
740 //=============================================================================
741 // Helpers for instruction lowering.
742 
743 /// Returns the size (in bits) of a given type.
ty_bits(ty: Type) -> usize744 pub fn ty_bits(ty: Type) -> usize {
745     match ty {
746         B1 => 1,
747         B8 | I8 => 8,
748         B16 | I16 => 16,
749         B32 | I32 | F32 => 32,
750         B64 | I64 | F64 => 64,
751         B128 | I128 => 128,
752         IFLAGS | FFLAGS => 32,
753         _ => panic!("ty_bits() on unknown type: {:?}", ty),
754     }
755 }
756 
ty_is_int(ty: Type) -> bool757 pub(crate) fn ty_is_int(ty: Type) -> bool {
758     match ty {
759         B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
760         F32 | F64 | B128 | I128 => false,
761         IFLAGS | FFLAGS => panic!("Unexpected flags type"),
762         _ => panic!("ty_is_int() on unknown type: {:?}", ty),
763     }
764 }
765 
ty_is_float(ty: Type) -> bool766 pub(crate) fn ty_is_float(ty: Type) -> bool {
767     !ty_is_int(ty)
768 }
769 
choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T770 pub(crate) fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
771     let bits = ty_bits(ty);
772     if bits <= 32 {
773         op32
774     } else if bits == 64 {
775         op64
776     } else {
777         panic!("choose_32_64 on > 64 bits!")
778     }
779 }
780 
ldst_offset(data: &InstructionData) -> Option<i32>781 pub(crate) fn ldst_offset(data: &InstructionData) -> Option<i32> {
782     match data {
783         &InstructionData::Load { offset, .. }
784         | &InstructionData::StackLoad { offset, .. }
785         | &InstructionData::LoadComplex { offset, .. }
786         | &InstructionData::Store { offset, .. }
787         | &InstructionData::StackStore { offset, .. }
788         | &InstructionData::StoreComplex { offset, .. } => Some(offset.into()),
789         _ => None,
790     }
791 }
792 
inst_condcode(data: &InstructionData) -> Option<IntCC>793 pub(crate) fn inst_condcode(data: &InstructionData) -> Option<IntCC> {
794     match data {
795         &InstructionData::IntCond { cond, .. }
796         | &InstructionData::BranchIcmp { cond, .. }
797         | &InstructionData::IntCompare { cond, .. }
798         | &InstructionData::IntCondTrap { cond, .. }
799         | &InstructionData::BranchInt { cond, .. }
800         | &InstructionData::IntSelect { cond, .. }
801         | &InstructionData::IntCompareImm { cond, .. } => Some(cond),
802         _ => None,
803     }
804 }
805 
inst_fp_condcode(data: &InstructionData) -> Option<FloatCC>806 pub(crate) fn inst_fp_condcode(data: &InstructionData) -> Option<FloatCC> {
807     match data {
808         &InstructionData::BranchFloat { cond, .. }
809         | &InstructionData::FloatCompare { cond, .. }
810         | &InstructionData::FloatCond { cond, .. }
811         | &InstructionData::FloatCondTrap { cond, .. } => Some(cond),
812         _ => None,
813     }
814 }
815 
inst_trapcode(data: &InstructionData) -> Option<TrapCode>816 pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
817     match data {
818         &InstructionData::Trap { code, .. }
819         | &InstructionData::CondTrap { code, .. }
820         | &InstructionData::IntCondTrap { code, .. }
821         | &InstructionData::FloatCondTrap { code, .. } => Some(code),
822         _ => None,
823     }
824 }
825 
826 /// Checks for an instance of `op` feeding the given input. Marks as merged (decrementing refcount) if so.
maybe_input_insn<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, ) -> Option<IRInst>827 pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
828     c: &mut C,
829     input: InsnInput,
830     op: Opcode,
831 ) -> Option<IRInst> {
832     if let InsnInputSource::Output(out) = input_source(c, input) {
833         let data = c.data(out.insn);
834         if data.opcode() == op {
835             c.merged(out.insn);
836             return Some(out.insn);
837         }
838     }
839     None
840 }
841 
842 /// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
843 /// Bint or a bitcast). Marks one or both as merged if so, as appropriate.
844 ///
845 /// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
846 /// a bit more generic.
maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, conv: Opcode, ) -> Option<IRInst>847 pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
848     c: &mut C,
849     input: InsnInput,
850     op: Opcode,
851     conv: Opcode,
852 ) -> Option<IRInst> {
853     if let Some(ret) = maybe_input_insn(c, input, op) {
854         return Some(ret);
855     }
856 
857     if let InsnInputSource::Output(out) = input_source(c, input) {
858         let data = c.data(out.insn);
859         if data.opcode() == conv {
860             let conv_insn = out.insn;
861             let conv_input = InsnInput {
862                 insn: conv_insn,
863                 input: 0,
864             };
865             if let Some(inner) = maybe_input_insn(c, conv_input, op) {
866                 c.merged(conv_insn);
867                 return Some(inner);
868             }
869         }
870     }
871     None
872 }
873 
lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>( ctx: &mut C, insn: IRInst, is_signed: bool, )874 pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
875     ctx: &mut C,
876     insn: IRInst,
877     is_signed: bool,
878 ) {
879     let ty = ctx.input_ty(insn, 0);
880     let bits = ty_bits(ty);
881     let narrow_mode = match (bits <= 32, is_signed) {
882         (true, true) => NarrowValueMode::SignExtend32,
883         (true, false) => NarrowValueMode::ZeroExtend32,
884         (false, true) => NarrowValueMode::SignExtend64,
885         (false, false) => NarrowValueMode::ZeroExtend64,
886     };
887     let inputs = [
888         InsnInput {
889             insn: insn,
890             input: 0,
891         },
892         InsnInput {
893             insn: insn,
894             input: 1,
895         },
896     ];
897     let ty = ctx.input_ty(insn, 0);
898     let rn = input_to_reg(ctx, inputs[0], narrow_mode);
899     let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
900     let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
901     let rd = writable_zero_reg();
902     ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
903 }
904 
lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst)905 pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
906     let ty = ctx.input_ty(insn, 0);
907     let bits = ty_bits(ty);
908     let inputs = [
909         InsnInput {
910             insn: insn,
911             input: 0,
912         },
913         InsnInput {
914             insn: insn,
915             input: 1,
916         },
917     ];
918     let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
919     let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
920     match bits {
921         32 => {
922             ctx.emit(Inst::FpuCmp32 { rn, rm });
923         }
924         64 => {
925             ctx.emit(Inst::FpuCmp64 { rn, rm });
926         }
927         _ => panic!("Unknown float size"),
928     }
929 }
930 
931 //=============================================================================
932 // Lowering-backend trait implementation.
933 
934 impl LowerBackend for AArch64Backend {
935     type MInst = Inst;
936 
lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst)937     fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
938         lower_inst::lower_insn_to_regs(ctx, ir_inst);
939     }
940 
lower_branch_group<C: LowerCtx<I = Inst>>( &self, ctx: &mut C, branches: &[IRInst], targets: &[BlockIndex], fallthrough: Option<BlockIndex>, )941     fn lower_branch_group<C: LowerCtx<I = Inst>>(
942         &self,
943         ctx: &mut C,
944         branches: &[IRInst],
945         targets: &[BlockIndex],
946         fallthrough: Option<BlockIndex>,
947     ) {
948         lower_inst::lower_branch(ctx, branches, targets, fallthrough)
949     }
950 }
951