1 //! Lowering rules for AArch64.
2 //!
3 //! TODO: opportunities for better code generation:
4 //!
5 //! - Smarter use of addressing modes. Recognize a+SCALE*b patterns. Recognize
6 //!   pre/post-index opportunities.
7 //!
8 //! - Floating-point immediates (FIMM instruction).
9 
10 use crate::ir::condcodes::{FloatCC, IntCC};
11 use crate::ir::types::*;
12 use crate::ir::Inst as IRInst;
13 use crate::ir::{Opcode, Type};
14 use crate::machinst::lower::*;
15 use crate::machinst::*;
16 use crate::CodegenResult;
17 
18 use crate::isa::aarch64::inst::*;
19 use crate::isa::aarch64::AArch64Backend;
20 
21 use super::lower_inst;
22 
23 use crate::data_value::DataValue;
24 use log::{debug, trace};
25 use regalloc::{Reg, Writable};
26 use smallvec::SmallVec;
27 
28 //============================================================================
29 // Result enum types.
30 //
31 // Lowering of a given value results in one of these enums, depending on the
32 // modes in which we can accept the value.
33 
34 /// A lowering result: register, register-shift.  An SSA value can always be
35 /// lowered into one of these options; the register form is the fallback.
36 #[derive(Clone, Debug)]
37 enum ResultRS {
38     Reg(Reg),
39     RegShift(Reg, ShiftOpAndAmt),
40 }
41 
42 /// A lowering result: register, register-shift, register-extend.  An SSA value can always be
43 /// lowered into one of these options; the register form is the fallback.
44 #[derive(Clone, Debug)]
45 enum ResultRSE {
46     Reg(Reg),
47     RegShift(Reg, ShiftOpAndAmt),
48     RegExtend(Reg, ExtendOp),
49 }
50 
51 impl ResultRSE {
from_rs(rs: ResultRS) -> ResultRSE52     fn from_rs(rs: ResultRS) -> ResultRSE {
53         match rs {
54             ResultRS::Reg(r) => ResultRSE::Reg(r),
55             ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s),
56         }
57     }
58 }
59 
60 /// A lowering result: register, register-shift, register-extend, or 12-bit immediate form.
61 /// An SSA value can always be lowered into one of these options; the register form is the
62 /// fallback.
63 #[derive(Clone, Debug)]
64 pub(crate) enum ResultRSEImm12 {
65     Reg(Reg),
66     RegShift(Reg, ShiftOpAndAmt),
67     RegExtend(Reg, ExtendOp),
68     Imm12(Imm12),
69 }
70 
71 impl ResultRSEImm12 {
from_rse(rse: ResultRSE) -> ResultRSEImm1272     fn from_rse(rse: ResultRSE) -> ResultRSEImm12 {
73         match rse {
74             ResultRSE::Reg(r) => ResultRSEImm12::Reg(r),
75             ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s),
76             ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e),
77         }
78     }
79 }
80 
81 /// A lowering result: register, register-shift, or logical immediate form.
82 /// An SSA value can always be lowered into one of these options; the register form is the
83 /// fallback.
84 #[derive(Clone, Debug)]
85 pub(crate) enum ResultRSImmLogic {
86     Reg(Reg),
87     RegShift(Reg, ShiftOpAndAmt),
88     ImmLogic(ImmLogic),
89 }
90 
91 impl ResultRSImmLogic {
from_rs(rse: ResultRS) -> ResultRSImmLogic92     fn from_rs(rse: ResultRS) -> ResultRSImmLogic {
93         match rse {
94             ResultRS::Reg(r) => ResultRSImmLogic::Reg(r),
95             ResultRS::RegShift(r, s) => ResultRSImmLogic::RegShift(r, s),
96         }
97     }
98 }
99 
100 /// A lowering result: register or immediate shift amount (arg to a shift op).
101 /// An SSA value can always be lowered into one of these options; the register form is the
102 /// fallback.
103 #[derive(Clone, Debug)]
104 pub(crate) enum ResultRegImmShift {
105     Reg(Reg),
106     ImmShift(ImmShift),
107 }
108 
109 //============================================================================
110 // Lowering: convert instruction inputs to forms that we can use.
111 
112 /// Lower an instruction input to a 64-bit constant, if possible.
input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64>113 pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
114     let input = ctx.get_input_as_source_or_const(input.insn, input.input);
115     input.constant
116 }
117 
118 /// Lower an instruction input to a constant register-shift amount, if possible.
input_to_shiftimm<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<ShiftOpShiftImm>119 pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
120     ctx: &mut C,
121     input: InsnInput,
122 ) -> Option<ShiftOpShiftImm> {
123     input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
124 }
125 
const_param_to_u128<C: LowerCtx<I = Inst>>( ctx: &mut C, inst: IRInst, ) -> Option<u128>126 pub(crate) fn const_param_to_u128<C: LowerCtx<I = Inst>>(
127     ctx: &mut C,
128     inst: IRInst,
129 ) -> Option<u128> {
130     match ctx.get_immediate(inst) {
131         Some(DataValue::V128(bytes)) => Some(u128::from_le_bytes(bytes)),
132         _ => None,
133     }
134 }
135 
136 /// How to handle narrow values loaded into registers; see note on `narrow_mode`
137 /// parameter to `put_input_in_*` below.
138 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
139 pub(crate) enum NarrowValueMode {
140     None,
141     /// Zero-extend to 32 bits if original is < 32 bits.
142     ZeroExtend32,
143     /// Sign-extend to 32 bits if original is < 32 bits.
144     SignExtend32,
145     /// Zero-extend to 64 bits if original is < 64 bits.
146     ZeroExtend64,
147     /// Sign-extend to 64 bits if original is < 64 bits.
148     SignExtend64,
149 }
150 
151 impl NarrowValueMode {
is_32bit(&self) -> bool152     fn is_32bit(&self) -> bool {
153         match self {
154             NarrowValueMode::None => false,
155             NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true,
156             NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false,
157         }
158     }
159 }
160 
161 /// Lower an instruction input to a reg.
162 ///
163 /// The given register will be extended appropriately, according to
164 /// `narrow_mode` and the input's type. If extended, the value is
165 /// always extended to 64 bits, for simplicity.
put_input_in_reg<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> Reg166 pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
167     ctx: &mut C,
168     input: InsnInput,
169     narrow_mode: NarrowValueMode,
170 ) -> Reg {
171     debug!("put_input_in_reg: input {:?}", input);
172     let ty = ctx.input_ty(input.insn, input.input);
173     let from_bits = ty_bits(ty) as u8;
174     let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
175     let in_reg = if let Some(c) = inputs.constant {
176         // Generate constants fresh at each use to minimize long-range register pressure.
177         let masked = if from_bits < 64 {
178             c & ((1u64 << from_bits) - 1)
179         } else {
180             c
181         };
182         let to_reg = ctx.alloc_tmp(ty).only_reg().unwrap();
183         for inst in Inst::gen_constant(ValueRegs::one(to_reg), masked as u128, ty, |ty| {
184             ctx.alloc_tmp(ty).only_reg().unwrap()
185         })
186         .into_iter()
187         {
188             ctx.emit(inst);
189         }
190         to_reg.to_reg()
191     } else {
192         ctx.put_input_in_regs(input.insn, input.input)
193             .only_reg()
194             .unwrap()
195     };
196 
197     match (narrow_mode, from_bits) {
198         (NarrowValueMode::None, _) => in_reg,
199         (NarrowValueMode::ZeroExtend32, n) if n < 32 => {
200             let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
201             ctx.emit(Inst::Extend {
202                 rd: tmp,
203                 rn: in_reg,
204                 signed: false,
205                 from_bits,
206                 to_bits: 32,
207             });
208             tmp.to_reg()
209         }
210         (NarrowValueMode::SignExtend32, n) if n < 32 => {
211             let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
212             ctx.emit(Inst::Extend {
213                 rd: tmp,
214                 rn: in_reg,
215                 signed: true,
216                 from_bits,
217                 to_bits: 32,
218             });
219             tmp.to_reg()
220         }
221         (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
222 
223         (NarrowValueMode::ZeroExtend64, n) if n < 64 => {
224             if inputs.constant.is_some() {
225                 // Constants are zero-extended to full 64-bit width on load already.
226                 in_reg
227             } else {
228                 let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
229                 ctx.emit(Inst::Extend {
230                     rd: tmp,
231                     rn: in_reg,
232                     signed: false,
233                     from_bits,
234                     to_bits: 64,
235                 });
236                 tmp.to_reg()
237             }
238         }
239         (NarrowValueMode::SignExtend64, n) if n < 64 => {
240             let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
241             ctx.emit(Inst::Extend {
242                 rd: tmp,
243                 rn: in_reg,
244                 signed: true,
245                 from_bits,
246                 to_bits: 64,
247             });
248             tmp.to_reg()
249         }
250         (_, 64) => in_reg,
251         (_, 128) => in_reg,
252 
253         _ => panic!(
254             "Unsupported input width: input ty {} bits {} mode {:?}",
255             ty, from_bits, narrow_mode
256         ),
257     }
258 }
259 
260 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
261 ///
262 /// The `narrow_mode` flag indicates whether the consumer of this value needs
263 /// the high bits clear. For many operations, such as an add/sub/mul or any
264 /// bitwise logical operation, the low-bit results depend only on the low-bit
265 /// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit
266 /// value is stored in the low 8 bits of the register and the high 24 bits are
267 /// undefined. If the op truly needs the high N bits clear (such as for a
268 /// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
269 /// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
270 /// register will be provided the extended value.
put_input_in_rs<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRS271 fn put_input_in_rs<C: LowerCtx<I = Inst>>(
272     ctx: &mut C,
273     input: InsnInput,
274     narrow_mode: NarrowValueMode,
275 ) -> ResultRS {
276     let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
277     if let Some((insn, 0)) = inputs.inst {
278         let op = ctx.data(insn).opcode();
279 
280         if op == Opcode::Ishl {
281             let shiftee = InsnInput { insn, input: 0 };
282             let shift_amt = InsnInput { insn, input: 1 };
283 
284             // Can we get the shift amount as an immediate?
285             if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
286                 let shiftee_bits = ty_bits(ctx.input_ty(insn, 0));
287                 if shiftee_bits <= std::u8::MAX as usize {
288                     let shiftimm = shiftimm.mask(shiftee_bits as u8);
289                     let reg = put_input_in_reg(ctx, shiftee, narrow_mode);
290                     return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
291                 }
292             }
293         }
294     }
295 
296     ResultRS::Reg(put_input_in_reg(ctx, input, narrow_mode))
297 }
298 
299 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
300 /// This does not actually codegen the source instruction; it just uses the
301 /// vreg into which the source instruction will generate its value.
302 ///
303 /// See note on `put_input_in_rs` for a description of `narrow_mode`.
put_input_in_rse<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSE304 fn put_input_in_rse<C: LowerCtx<I = Inst>>(
305     ctx: &mut C,
306     input: InsnInput,
307     narrow_mode: NarrowValueMode,
308 ) -> ResultRSE {
309     let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
310     if let Some((insn, 0)) = inputs.inst {
311         let op = ctx.data(insn).opcode();
312         let out_ty = ctx.output_ty(insn, 0);
313         let out_bits = ty_bits(out_ty);
314 
315         // Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
316         if op == Opcode::Uextend || op == Opcode::Sextend {
317             let sign_extend = op == Opcode::Sextend;
318             let inner_ty = ctx.input_ty(insn, 0);
319             let inner_bits = ty_bits(inner_ty);
320             assert!(inner_bits < out_bits);
321             if match (sign_extend, narrow_mode) {
322                 // A single zero-extend or sign-extend is equal to itself.
323                 (_, NarrowValueMode::None) => true,
324                 // Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend.
325                 (false, NarrowValueMode::ZeroExtend32) | (false, NarrowValueMode::ZeroExtend64) => {
326                     true
327                 }
328                 (true, NarrowValueMode::SignExtend32) | (true, NarrowValueMode::SignExtend64) => {
329                     true
330                 }
331                 // A zero-extend and a sign-extend in a row is not equal to a single zero-extend or sign-extend
332                 (false, NarrowValueMode::SignExtend32) | (false, NarrowValueMode::SignExtend64) => {
333                     false
334                 }
335                 (true, NarrowValueMode::ZeroExtend32) | (true, NarrowValueMode::ZeroExtend64) => {
336                     false
337                 }
338             } {
339                 let extendop = match (sign_extend, inner_bits) {
340                     (true, 8) => ExtendOp::SXTB,
341                     (false, 8) => ExtendOp::UXTB,
342                     (true, 16) => ExtendOp::SXTH,
343                     (false, 16) => ExtendOp::UXTH,
344                     (true, 32) => ExtendOp::SXTW,
345                     (false, 32) => ExtendOp::UXTW,
346                     _ => unreachable!(),
347                 };
348                 let reg =
349                     put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
350                 return ResultRSE::RegExtend(reg, extendop);
351             }
352         }
353 
354         // If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
355         // then get the result into a register and return an Extend-mode operand on
356         // that register.
357         if narrow_mode != NarrowValueMode::None
358             && ((narrow_mode.is_32bit() && out_bits < 32)
359                 || (!narrow_mode.is_32bit() && out_bits < 64))
360         {
361             let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
362             let extendop = match (narrow_mode, out_bits) {
363                 (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
364                     ExtendOp::SXTB
365                 }
366                 (NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => {
367                     ExtendOp::UXTB
368                 }
369                 (NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => {
370                     ExtendOp::SXTB
371                 }
372                 (NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => {
373                     ExtendOp::UXTB
374                 }
375                 (NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => {
376                     ExtendOp::SXTH
377                 }
378                 (NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => {
379                     ExtendOp::UXTH
380                 }
381                 (NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW,
382                 (NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
383                 _ => unreachable!(),
384             };
385             return ResultRSE::RegExtend(reg, extendop);
386         }
387     }
388 
389     ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode))
390 }
391 
put_input_in_rse_imm12<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSEImm12392 pub(crate) fn put_input_in_rse_imm12<C: LowerCtx<I = Inst>>(
393     ctx: &mut C,
394     input: InsnInput,
395     narrow_mode: NarrowValueMode,
396 ) -> ResultRSEImm12 {
397     if let Some(imm_value) = input_to_const(ctx, input) {
398         if let Some(i) = Imm12::maybe_from_u64(imm_value) {
399             return ResultRSEImm12::Imm12(i);
400         }
401     }
402 
403     ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode))
404 }
405 
406 /// Like `put_input_in_rse_imm12` above, except is allowed to negate the
407 /// argument (assuming a two's-complement representation with the given bit
408 /// width) if this allows use of 12-bit immediate. Used to flip `add`s with
409 /// negative immediates to `sub`s (and vice-versa).
put_input_in_rse_imm12_maybe_negated<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, twos_complement_bits: usize, narrow_mode: NarrowValueMode, ) -> (ResultRSEImm12, bool)410 pub(crate) fn put_input_in_rse_imm12_maybe_negated<C: LowerCtx<I = Inst>>(
411     ctx: &mut C,
412     input: InsnInput,
413     twos_complement_bits: usize,
414     narrow_mode: NarrowValueMode,
415 ) -> (ResultRSEImm12, bool) {
416     assert!(twos_complement_bits <= 64);
417     if let Some(imm_value) = input_to_const(ctx, input) {
418         if let Some(i) = Imm12::maybe_from_u64(imm_value) {
419             return (ResultRSEImm12::Imm12(i), false);
420         }
421         let sign_extended =
422             ((imm_value as i64) << (64 - twos_complement_bits)) >> (64 - twos_complement_bits);
423         let inverted = sign_extended.wrapping_neg();
424         if let Some(i) = Imm12::maybe_from_u64(inverted as u64) {
425             return (ResultRSEImm12::Imm12(i), true);
426         }
427     }
428 
429     (
430         ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode)),
431         false,
432     )
433 }
434 
put_input_in_rs_immlogic<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSImmLogic435 pub(crate) fn put_input_in_rs_immlogic<C: LowerCtx<I = Inst>>(
436     ctx: &mut C,
437     input: InsnInput,
438     narrow_mode: NarrowValueMode,
439 ) -> ResultRSImmLogic {
440     if let Some(imm_value) = input_to_const(ctx, input) {
441         let ty = ctx.input_ty(input.insn, input.input);
442         let ty = if ty_bits(ty) < 32 { I32 } else { ty };
443         if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
444             return ResultRSImmLogic::ImmLogic(i);
445         }
446     }
447 
448     ResultRSImmLogic::from_rs(put_input_in_rs(ctx, input, narrow_mode))
449 }
450 
put_input_in_reg_immshift<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, shift_width_bits: usize, ) -> ResultRegImmShift451 pub(crate) fn put_input_in_reg_immshift<C: LowerCtx<I = Inst>>(
452     ctx: &mut C,
453     input: InsnInput,
454     shift_width_bits: usize,
455 ) -> ResultRegImmShift {
456     if let Some(imm_value) = input_to_const(ctx, input) {
457         let imm_value = imm_value & ((shift_width_bits - 1) as u64);
458         if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
459             return ResultRegImmShift::ImmShift(immshift);
460         }
461     }
462 
463     ResultRegImmShift::Reg(put_input_in_reg(ctx, input, NarrowValueMode::None))
464 }
465 
466 //============================================================================
467 // ALU instruction constructors.
468 
alu_inst_imm12(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSEImm12) -> Inst469 pub(crate) fn alu_inst_imm12(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSEImm12) -> Inst {
470     match rm {
471         ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 {
472             alu_op: op,
473             rd,
474             rn,
475             imm12,
476         },
477         ResultRSEImm12::Reg(rm) => Inst::AluRRR {
478             alu_op: op,
479             rd,
480             rn,
481             rm,
482         },
483         ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift {
484             alu_op: op,
485             rd,
486             rn,
487             rm,
488             shiftop,
489         },
490         ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend {
491             alu_op: op,
492             rd,
493             rn,
494             rm,
495             extendop,
496         },
497     }
498 }
499 
alu_inst_immlogic( op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSImmLogic, ) -> Inst500 pub(crate) fn alu_inst_immlogic(
501     op: ALUOp,
502     rd: Writable<Reg>,
503     rn: Reg,
504     rm: ResultRSImmLogic,
505 ) -> Inst {
506     match rm {
507         ResultRSImmLogic::ImmLogic(imml) => Inst::AluRRImmLogic {
508             alu_op: op,
509             rd,
510             rn,
511             imml,
512         },
513         ResultRSImmLogic::Reg(rm) => Inst::AluRRR {
514             alu_op: op,
515             rd,
516             rn,
517             rm,
518         },
519         ResultRSImmLogic::RegShift(rm, shiftop) => Inst::AluRRRShift {
520             alu_op: op,
521             rd,
522             rn,
523             rm,
524             shiftop,
525         },
526     }
527 }
528 
alu_inst_immshift( op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRegImmShift, ) -> Inst529 pub(crate) fn alu_inst_immshift(
530     op: ALUOp,
531     rd: Writable<Reg>,
532     rn: Reg,
533     rm: ResultRegImmShift,
534 ) -> Inst {
535     match rm {
536         ResultRegImmShift::ImmShift(immshift) => Inst::AluRRImmShift {
537             alu_op: op,
538             rd,
539             rn,
540             immshift,
541         },
542         ResultRegImmShift::Reg(rm) => Inst::AluRRR {
543             alu_op: op,
544             rd,
545             rn,
546             rm,
547         },
548     }
549 }
550 
551 //============================================================================
552 // Lowering: addressing mode support. Takes instruction directly, rather
553 // than an `InsnInput`, to do more introspection.
554 
555 /// 32-bit addends that make up an address: an input, and an extension mode on that
556 /// input.
557 type AddressAddend32List = SmallVec<[(Reg, ExtendOp); 4]>;
558 /// 64-bit addends that make up an address: just an input.
559 type AddressAddend64List = SmallVec<[Reg; 4]>;
560 
561 /// Collect all addends that feed into an address computation, with extend-modes
562 /// on each.  Note that a load/store may have multiple address components (and
563 /// the CLIF semantics are that these components are added to form the final
564 /// address), but sometimes the CLIF that we receive still has arguments that
565 /// refer to `iadd` instructions. We also want to handle uextend/sextend below
566 /// the add(s).
567 ///
568 /// We match any 64-bit add (and descend into its inputs), and we match any
569 /// 32-to-64-bit sign or zero extension. The returned addend-list will use
570 /// NarrowValueMode values to indicate how to extend each input:
571 ///
572 /// - NarrowValueMode::None: the associated input is 64 bits wide; no extend.
573 /// - NarrowValueMode::SignExtend64: the associated input is 32 bits wide;
574 ///                                  do a sign-extension.
575 /// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide;
576 ///                                  do a zero-extension.
577 ///
578 /// We do not descend further into the inputs of extensions (unless it is a constant),
579 /// because supporting (e.g.) a 32-bit add that is later extended would require
580 /// additional masking of high-order bits, which is too complex. So, in essence, we
581 /// descend any number of adds from the roots, collecting all 64-bit address addends;
582 /// then possibly support extensions at these leaves.
collect_address_addends<C: LowerCtx<I = Inst>>( ctx: &mut C, roots: &[InsnInput], ) -> (AddressAddend64List, AddressAddend32List, i64)583 fn collect_address_addends<C: LowerCtx<I = Inst>>(
584     ctx: &mut C,
585     roots: &[InsnInput],
586 ) -> (AddressAddend64List, AddressAddend32List, i64) {
587     let mut result32: AddressAddend32List = SmallVec::new();
588     let mut result64: AddressAddend64List = SmallVec::new();
589     let mut offset: i64 = 0;
590 
591     let mut workqueue: SmallVec<[InsnInput; 4]> = roots.iter().cloned().collect();
592 
593     while let Some(input) = workqueue.pop() {
594         debug_assert!(ty_bits(ctx.input_ty(input.insn, input.input)) == 64);
595         if let Some((op, insn)) = maybe_input_insn_multi(
596             ctx,
597             input,
598             &[
599                 Opcode::Uextend,
600                 Opcode::Sextend,
601                 Opcode::Iadd,
602                 Opcode::Iconst,
603             ],
604         ) {
605             match op {
606                 Opcode::Uextend | Opcode::Sextend if ty_bits(ctx.input_ty(insn, 0)) == 32 => {
607                     let extendop = if op == Opcode::Uextend {
608                         ExtendOp::UXTW
609                     } else {
610                         ExtendOp::SXTW
611                     };
612                     let extendee_input = InsnInput { insn, input: 0 };
613                     // If the input is a zero-extension of a constant, add the value to the known
614                     // offset.
615                     // Only do this for zero-extension, as generating a sign-extended
616                     // constant may be more instructions than using the 'SXTW' addressing mode.
617                     if let (Some(insn), ExtendOp::UXTW) = (
618                         maybe_input_insn(ctx, extendee_input, Opcode::Iconst),
619                         extendop,
620                     ) {
621                         let value = (ctx.get_constant(insn).unwrap() & 0xFFFF_FFFF_u64) as i64;
622                         offset += value;
623                     } else {
624                         let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None);
625                         result32.push((reg, extendop));
626                     }
627                 }
628                 Opcode::Uextend | Opcode::Sextend => {
629                     let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
630                     result64.push(reg);
631                 }
632                 Opcode::Iadd => {
633                     for input in 0..ctx.num_inputs(insn) {
634                         let addend = InsnInput { insn, input };
635                         workqueue.push(addend);
636                     }
637                 }
638                 Opcode::Iconst => {
639                     let value: i64 = ctx.get_constant(insn).unwrap() as i64;
640                     offset += value;
641                 }
642                 _ => panic!("Unexpected opcode from maybe_input_insn_multi"),
643             }
644         } else {
645             let reg = put_input_in_reg(ctx, input, NarrowValueMode::ZeroExtend64);
646             result64.push(reg);
647         }
648     }
649 
650     (result64, result32, offset)
651 }
652 
653 /// Lower the address of a load or store.
lower_address<C: LowerCtx<I = Inst>>( ctx: &mut C, elem_ty: Type, roots: &[InsnInput], offset: i32, ) -> AMode654 pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
655     ctx: &mut C,
656     elem_ty: Type,
657     roots: &[InsnInput],
658     offset: i32,
659 ) -> AMode {
660     // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
661     // mul instructions (Load/StoreComplex don't include scale factors).
662 
663     // Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
664     // extends and addition ops. We update these as we consume address
665     // components, so they represent the remaining addends not yet handled.
666     let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots);
667     let mut offset = args_offset + (offset as i64);
668 
669     trace!(
670         "lower_address: addends64 {:?}, addends32 {:?}, offset {}",
671         addends64,
672         addends32,
673         offset
674     );
675 
676     // First, decide what the `AMode` will be. Take one extendee and one 64-bit
677     // reg, or two 64-bit regs, or a 64-bit reg and a 32-bit reg with extension,
678     // or some other combination as appropriate.
679     let memarg = if addends64.len() > 0 {
680         if addends32.len() > 0 {
681             let (reg32, extendop) = addends32.pop().unwrap();
682             let reg64 = addends64.pop().unwrap();
683             AMode::RegExtended(reg64, reg32, extendop)
684         } else if offset > 0 && offset < 0x1000 {
685             let reg64 = addends64.pop().unwrap();
686             let off = offset;
687             offset = 0;
688             AMode::RegOffset(reg64, off, elem_ty)
689         } else if addends64.len() >= 2 {
690             let reg1 = addends64.pop().unwrap();
691             let reg2 = addends64.pop().unwrap();
692             AMode::RegReg(reg1, reg2)
693         } else {
694             let reg1 = addends64.pop().unwrap();
695             AMode::reg(reg1)
696         }
697     } else
698     /* addends64.len() == 0 */
699     {
700         if addends32.len() > 0 {
701             let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
702             let (reg1, extendop) = addends32.pop().unwrap();
703             let signed = match extendop {
704                 ExtendOp::SXTW => true,
705                 ExtendOp::UXTW => false,
706                 _ => unreachable!(),
707             };
708             ctx.emit(Inst::Extend {
709                 rd: tmp,
710                 rn: reg1,
711                 signed,
712                 from_bits: 32,
713                 to_bits: 64,
714             });
715             if let Some((reg2, extendop)) = addends32.pop() {
716                 AMode::RegExtended(tmp.to_reg(), reg2, extendop)
717             } else {
718                 AMode::reg(tmp.to_reg())
719             }
720         } else
721         /* addends32.len() == 0 */
722         {
723             let off_reg = ctx.alloc_tmp(I64).only_reg().unwrap();
724             lower_constant_u64(ctx, off_reg, offset as u64);
725             offset = 0;
726             AMode::reg(off_reg.to_reg())
727         }
728     };
729 
730     // At this point, if we have any remaining components, we need to allocate a
731     // temp, replace one of the registers in the AMode with the temp, and emit
732     // instructions to add together the remaining components. Return immediately
733     // if this is *not* the case.
734     if offset == 0 && addends32.len() == 0 && addends64.len() == 0 {
735         return memarg;
736     }
737 
738     // Allocate the temp and shoehorn it into the AMode.
739     let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
740     let (reg, memarg) = match memarg {
741         AMode::RegExtended(r1, r2, extendop) => {
742             (r1, AMode::RegExtended(addr.to_reg(), r2, extendop))
743         }
744         AMode::RegOffset(r, off, ty) => (r, AMode::RegOffset(addr.to_reg(), off, ty)),
745         AMode::RegReg(r1, r2) => (r2, AMode::RegReg(addr.to_reg(), r1)),
746         AMode::UnsignedOffset(r, imm) => (r, AMode::UnsignedOffset(addr.to_reg(), imm)),
747         _ => unreachable!(),
748     };
749 
750     // If there is any offset, load that first into `addr`, and add the `reg`
751     // that we kicked out of the `AMode`; otherwise, start with that reg.
752     if offset != 0 {
753         // If we can fit offset or -offset in an imm12, use an add-imm
754         // to combine the reg and offset. Otherwise, load value first then add.
755         if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) {
756             ctx.emit(Inst::AluRRImm12 {
757                 alu_op: ALUOp::Add64,
758                 rd: addr,
759                 rn: reg,
760                 imm12,
761             });
762         } else if let Some(imm12) = Imm12::maybe_from_u64(offset.wrapping_neg() as u64) {
763             ctx.emit(Inst::AluRRImm12 {
764                 alu_op: ALUOp::Sub64,
765                 rd: addr,
766                 rn: reg,
767                 imm12,
768             });
769         } else {
770             lower_constant_u64(ctx, addr, offset as u64);
771             ctx.emit(Inst::AluRRR {
772                 alu_op: ALUOp::Add64,
773                 rd: addr,
774                 rn: addr.to_reg(),
775                 rm: reg,
776             });
777         }
778     } else {
779         ctx.emit(Inst::gen_move(addr, reg, I64));
780     }
781 
782     // Now handle reg64 and reg32-extended components.
783     for reg in addends64 {
784         // If the register is the stack reg, we must move it to another reg
785         // before adding it.
786         let reg = if reg == stack_reg() {
787             let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
788             ctx.emit(Inst::gen_move(tmp, stack_reg(), I64));
789             tmp.to_reg()
790         } else {
791             reg
792         };
793         ctx.emit(Inst::AluRRR {
794             alu_op: ALUOp::Add64,
795             rd: addr,
796             rn: addr.to_reg(),
797             rm: reg,
798         });
799     }
800     for (reg, extendop) in addends32 {
801         assert!(reg != stack_reg());
802         ctx.emit(Inst::AluRRRExtend {
803             alu_op: ALUOp::Add64,
804             rd: addr,
805             rn: addr.to_reg(),
806             rm: reg,
807             extendop,
808         });
809     }
810 
811     memarg
812 }
813 
lower_constant_u64<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: u64, )814 pub(crate) fn lower_constant_u64<C: LowerCtx<I = Inst>>(
815     ctx: &mut C,
816     rd: Writable<Reg>,
817     value: u64,
818 ) {
819     for inst in Inst::load_constant(rd, value) {
820         ctx.emit(inst);
821     }
822 }
823 
lower_constant_f32<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: f32, )824 pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
825     ctx: &mut C,
826     rd: Writable<Reg>,
827     value: f32,
828 ) {
829     let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
830 
831     for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
832         ctx.emit(inst);
833     }
834 }
835 
lower_constant_f64<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: f64, )836 pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
837     ctx: &mut C,
838     rd: Writable<Reg>,
839     value: f64,
840 ) {
841     let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
842 
843     for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
844         ctx.emit(inst);
845     }
846 }
847 
lower_constant_f128<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: u128, )848 pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
849     ctx: &mut C,
850     rd: Writable<Reg>,
851     value: u128,
852 ) {
853     if value == 0 {
854         // Fast-track a common case.  The general case, viz, calling `Inst::load_fp_constant128`,
855         // is potentially expensive.
856         ctx.emit(Inst::VecDupImm {
857             rd,
858             imm: ASIMDMovModImm::zero(ScalarSize::Size8),
859             invert: false,
860             size: VectorSize::Size8x16,
861         });
862     } else {
863         let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
864         for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
865             ctx.emit(inst);
866         }
867     }
868 }
869 
lower_splat_const<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, value: u64, size: VectorSize, )870 pub(crate) fn lower_splat_const<C: LowerCtx<I = Inst>>(
871     ctx: &mut C,
872     rd: Writable<Reg>,
873     value: u64,
874     size: VectorSize,
875 ) {
876     let (value, narrow_size) = match size.lane_size() {
877         ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128),
878         ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8),
879         ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16),
880         ScalarSize::Size64 => (value, ScalarSize::Size32),
881         _ => unreachable!(),
882     };
883     let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) {
884         Some((value, lane_size)) => (
885             value,
886             VectorSize::from_lane_size(lane_size, size.is_128bits()),
887         ),
888         None => (value, size),
889     };
890     let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
891 
892     for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
893         ctx.emit(inst);
894     }
895 }
896 
lower_condcode(cc: IntCC) -> Cond897 pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
898     match cc {
899         IntCC::Equal => Cond::Eq,
900         IntCC::NotEqual => Cond::Ne,
901         IntCC::SignedGreaterThanOrEqual => Cond::Ge,
902         IntCC::SignedGreaterThan => Cond::Gt,
903         IntCC::SignedLessThanOrEqual => Cond::Le,
904         IntCC::SignedLessThan => Cond::Lt,
905         IntCC::UnsignedGreaterThanOrEqual => Cond::Hs,
906         IntCC::UnsignedGreaterThan => Cond::Hi,
907         IntCC::UnsignedLessThanOrEqual => Cond::Ls,
908         IntCC::UnsignedLessThan => Cond::Lo,
909         IntCC::Overflow => Cond::Vs,
910         IntCC::NotOverflow => Cond::Vc,
911     }
912 }
913 
lower_fp_condcode(cc: FloatCC) -> Cond914 pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
915     // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
916     // The FCMP instruction sets:
917     //               NZCV
918     // - PCSR.NZCV = 0011 on UN (unordered),
919     //               0110 on EQ,
920     //               1000 on LT,
921     //               0010 on GT.
922     match cc {
923         // EQ | LT | GT. Vc => V clear.
924         FloatCC::Ordered => Cond::Vc,
925         // UN. Vs => V set.
926         FloatCC::Unordered => Cond::Vs,
927         // EQ. Eq => Z set.
928         FloatCC::Equal => Cond::Eq,
929         // UN | LT | GT. Ne => Z clear.
930         FloatCC::NotEqual => Cond::Ne,
931         // LT | GT.
932         FloatCC::OrderedNotEqual => unimplemented!(),
933         //  UN | EQ
934         FloatCC::UnorderedOrEqual => unimplemented!(),
935         // LT. Mi => N set.
936         FloatCC::LessThan => Cond::Mi,
937         // LT | EQ. Ls => C clear or Z set.
938         FloatCC::LessThanOrEqual => Cond::Ls,
939         // GT. Gt => Z clear, N = V.
940         FloatCC::GreaterThan => Cond::Gt,
941         // GT | EQ. Ge => N = V.
942         FloatCC::GreaterThanOrEqual => Cond::Ge,
943         // UN | LT
944         FloatCC::UnorderedOrLessThan => unimplemented!(),
945         // UN | LT | EQ
946         FloatCC::UnorderedOrLessThanOrEqual => unimplemented!(),
947         // UN | GT
948         FloatCC::UnorderedOrGreaterThan => unimplemented!(),
949         // UN | GT | EQ
950         FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!(),
951     }
952 }
953 
lower_vector_compare<C: LowerCtx<I = Inst>>( ctx: &mut C, rd: Writable<Reg>, mut rn: Reg, mut rm: Reg, ty: Type, cond: Cond, ) -> CodegenResult<()>954 pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
955     ctx: &mut C,
956     rd: Writable<Reg>,
957     mut rn: Reg,
958     mut rm: Reg,
959     ty: Type,
960     cond: Cond,
961 ) -> CodegenResult<()> {
962     let is_float = match ty {
963         F32X4 | F64X2 => true,
964         _ => false,
965     };
966     let size = VectorSize::from_ty(ty);
967     // 'Less than' operations are implemented by swapping
968     // the order of operands and using the 'greater than'
969     // instructions.
970     // 'Not equal' is implemented with 'equal' and inverting
971     // the result.
972     let (alu_op, swap) = match (is_float, cond) {
973         (false, Cond::Eq) => (VecALUOp::Cmeq, false),
974         (false, Cond::Ne) => (VecALUOp::Cmeq, false),
975         (false, Cond::Ge) => (VecALUOp::Cmge, false),
976         (false, Cond::Gt) => (VecALUOp::Cmgt, false),
977         (false, Cond::Le) => (VecALUOp::Cmge, true),
978         (false, Cond::Lt) => (VecALUOp::Cmgt, true),
979         (false, Cond::Hs) => (VecALUOp::Cmhs, false),
980         (false, Cond::Hi) => (VecALUOp::Cmhi, false),
981         (false, Cond::Ls) => (VecALUOp::Cmhs, true),
982         (false, Cond::Lo) => (VecALUOp::Cmhi, true),
983         (true, Cond::Eq) => (VecALUOp::Fcmeq, false),
984         (true, Cond::Ne) => (VecALUOp::Fcmeq, false),
985         (true, Cond::Mi) => (VecALUOp::Fcmgt, true),
986         (true, Cond::Ls) => (VecALUOp::Fcmge, true),
987         (true, Cond::Ge) => (VecALUOp::Fcmge, false),
988         (true, Cond::Gt) => (VecALUOp::Fcmgt, false),
989         _ => unreachable!(),
990     };
991 
992     if swap {
993         std::mem::swap(&mut rn, &mut rm);
994     }
995 
996     ctx.emit(Inst::VecRRR {
997         alu_op,
998         rd,
999         rn,
1000         rm,
1001         size,
1002     });
1003 
1004     if cond == Cond::Ne {
1005         ctx.emit(Inst::VecMisc {
1006             op: VecMisc2::Not,
1007             rd,
1008             rn: rd.to_reg(),
1009             size,
1010         });
1011     }
1012 
1013     Ok(())
1014 }
1015 
1016 /// Determines whether this condcode interprets inputs as signed or unsigned.  See the
1017 /// documentation for the `icmp` instruction in cranelift-codegen/meta/src/shared/instructions.rs
1018 /// for further insights into this.
condcode_is_signed(cc: IntCC) -> bool1019 pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
1020     match cc {
1021         IntCC::Equal
1022         | IntCC::UnsignedGreaterThanOrEqual
1023         | IntCC::UnsignedGreaterThan
1024         | IntCC::UnsignedLessThanOrEqual
1025         | IntCC::UnsignedLessThan
1026         | IntCC::NotEqual => false,
1027         IntCC::SignedGreaterThanOrEqual
1028         | IntCC::SignedGreaterThan
1029         | IntCC::SignedLessThanOrEqual
1030         | IntCC::SignedLessThan
1031         | IntCC::Overflow
1032         | IntCC::NotOverflow => true,
1033     }
1034 }
1035 
1036 //=============================================================================
1037 // Helpers for instruction lowering.
1038 
choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T1039 pub(crate) fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
1040     let bits = ty_bits(ty);
1041     if bits <= 32 {
1042         op32
1043     } else if bits == 64 {
1044         op64
1045     } else {
1046         panic!("choose_32_64 on > 64 bits!")
1047     }
1048 }
1049 
1050 /// Checks for an instance of `op` feeding the given input.
maybe_input_insn<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, ) -> Option<IRInst>1051 pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
1052     c: &mut C,
1053     input: InsnInput,
1054     op: Opcode,
1055 ) -> Option<IRInst> {
1056     let inputs = c.get_input_as_source_or_const(input.insn, input.input);
1057     debug!(
1058         "maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
1059         input, inputs, op
1060     );
1061     if let Some((src_inst, _)) = inputs.inst {
1062         let data = c.data(src_inst);
1063         debug!(" -> input inst {:?}", data);
1064         if data.opcode() == op {
1065             return Some(src_inst);
1066         }
1067     }
1068     None
1069 }
1070 
1071 /// Checks for an instance of any one of `ops` feeding the given input.
maybe_input_insn_multi<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, ops: &[Opcode], ) -> Option<(Opcode, IRInst)>1072 pub(crate) fn maybe_input_insn_multi<C: LowerCtx<I = Inst>>(
1073     c: &mut C,
1074     input: InsnInput,
1075     ops: &[Opcode],
1076 ) -> Option<(Opcode, IRInst)> {
1077     for &op in ops {
1078         if let Some(inst) = maybe_input_insn(c, input, op) {
1079             return Some((op, inst));
1080         }
1081     }
1082     None
1083 }
1084 
1085 /// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
1086 /// Bint or a bitcast).
1087 ///
1088 /// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
1089 /// a bit more generic.
maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, conv: Opcode, ) -> Option<IRInst>1090 pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
1091     c: &mut C,
1092     input: InsnInput,
1093     op: Opcode,
1094     conv: Opcode,
1095 ) -> Option<IRInst> {
1096     let inputs = c.get_input_as_source_or_const(input.insn, input.input);
1097     if let Some((src_inst, _)) = inputs.inst {
1098         let data = c.data(src_inst);
1099         if data.opcode() == op {
1100             return Some(src_inst);
1101         }
1102         if data.opcode() == conv {
1103             let inputs = c.get_input_as_source_or_const(src_inst, 0);
1104             if let Some((src_inst, _)) = inputs.inst {
1105                 let data = c.data(src_inst);
1106                 if data.opcode() == op {
1107                     return Some(src_inst);
1108                 }
1109             }
1110         }
1111     }
1112     None
1113 }
1114 
lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>( ctx: &mut C, insn: IRInst, is_signed: bool, )1115 pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
1116     ctx: &mut C,
1117     insn: IRInst,
1118     is_signed: bool,
1119 ) {
1120     debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
1121     let ty = ctx.input_ty(insn, 0);
1122     let bits = ty_bits(ty);
1123     let narrow_mode = match (bits <= 32, is_signed) {
1124         (true, true) => NarrowValueMode::SignExtend32,
1125         (true, false) => NarrowValueMode::ZeroExtend32,
1126         (false, true) => NarrowValueMode::SignExtend64,
1127         (false, false) => NarrowValueMode::ZeroExtend64,
1128     };
1129     let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
1130     let ty = ctx.input_ty(insn, 0);
1131     let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
1132     let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
1133     debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
1134     let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
1135     let rd = writable_zero_reg();
1136     ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
1137 }
1138 
lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst)1139 pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
1140     let ty = ctx.input_ty(insn, 0);
1141     let bits = ty_bits(ty);
1142     let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
1143     let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1144     let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1145     match bits {
1146         32 => {
1147             ctx.emit(Inst::FpuCmp32 { rn, rm });
1148         }
1149         64 => {
1150             ctx.emit(Inst::FpuCmp64 { rn, rm });
1151         }
1152         _ => panic!("Unknown float size"),
1153     }
1154 }
1155 
1156 /// Materialize a boolean value into a register from the flags
1157 /// (e.g set by a comparison).
1158 /// A 0 / -1 (all-ones) result as expected for bool operations.
materialize_bool_result<C: LowerCtx<I = Inst>>( ctx: &mut C, insn: IRInst, rd: Writable<Reg>, cond: Cond, )1159 pub(crate) fn materialize_bool_result<C: LowerCtx<I = Inst>>(
1160     ctx: &mut C,
1161     insn: IRInst,
1162     rd: Writable<Reg>,
1163     cond: Cond,
1164 ) {
1165     // A boolean is 0 / -1; if output width is > 1 use `csetm`,
1166     // otherwise use `cset`.
1167     if ty_bits(ctx.output_ty(insn, 0)) > 1 {
1168         ctx.emit(Inst::CSetm { rd, cond });
1169     } else {
1170         ctx.emit(Inst::CSet { rd, cond });
1171     }
1172 }
1173 
1174 /// This is target-word-size dependent.  And it excludes booleans and reftypes.
is_valid_atomic_transaction_ty(ty: Type) -> bool1175 pub(crate) fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
1176     match ty {
1177         I8 | I16 | I32 | I64 => true,
1178         _ => false,
1179     }
1180 }
1181 
load_op_to_ty(op: Opcode) -> Option<Type>1182 fn load_op_to_ty(op: Opcode) -> Option<Type> {
1183     match op {
1184         Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => Some(I8),
1185         Opcode::Sload16 | Opcode::Uload16 | Opcode::Sload16Complex | Opcode::Uload16Complex => {
1186             Some(I16)
1187         }
1188         Opcode::Sload32 | Opcode::Uload32 | Opcode::Sload32Complex | Opcode::Uload32Complex => {
1189             Some(I32)
1190         }
1191         Opcode::Load | Opcode::LoadComplex => None,
1192         Opcode::Sload8x8 | Opcode::Uload8x8 | Opcode::Sload8x8Complex | Opcode::Uload8x8Complex => {
1193             Some(I8X8)
1194         }
1195         Opcode::Sload16x4
1196         | Opcode::Uload16x4
1197         | Opcode::Sload16x4Complex
1198         | Opcode::Uload16x4Complex => Some(I16X4),
1199         Opcode::Sload32x2
1200         | Opcode::Uload32x2
1201         | Opcode::Sload32x2Complex
1202         | Opcode::Uload32x2Complex => Some(I32X2),
1203         _ => None,
1204     }
1205 }
1206 
1207 /// Helper to lower a load instruction; this is used in several places, because
1208 /// a load can sometimes be merged into another operation.
lower_load<C: LowerCtx<I = Inst>, F: FnMut(&mut C, Writable<Reg>, Type, AMode)>( ctx: &mut C, ir_inst: IRInst, inputs: &[InsnInput], output: InsnOutput, mut f: F, )1209 pub(crate) fn lower_load<C: LowerCtx<I = Inst>, F: FnMut(&mut C, Writable<Reg>, Type, AMode)>(
1210     ctx: &mut C,
1211     ir_inst: IRInst,
1212     inputs: &[InsnInput],
1213     output: InsnOutput,
1214     mut f: F,
1215 ) {
1216     let op = ctx.data(ir_inst).opcode();
1217 
1218     let elem_ty = load_op_to_ty(op).unwrap_or_else(|| ctx.output_ty(ir_inst, 0));
1219 
1220     let off = ctx.data(ir_inst).load_store_offset().unwrap();
1221     let mem = lower_address(ctx, elem_ty, &inputs[..], off);
1222     let rd = get_output_reg(ctx, output).only_reg().unwrap();
1223 
1224     f(ctx, rd, elem_ty, mem);
1225 }
1226 
1227 //=============================================================================
1228 // Lowering-backend trait implementation.
1229 
1230 impl LowerBackend for AArch64Backend {
1231     type MInst = Inst;
1232 
lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()>1233     fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
1234         lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.isa_flags)
1235     }
1236 
lower_branch_group<C: LowerCtx<I = Inst>>( &self, ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], ) -> CodegenResult<()>1237     fn lower_branch_group<C: LowerCtx<I = Inst>>(
1238         &self,
1239         ctx: &mut C,
1240         branches: &[IRInst],
1241         targets: &[MachLabel],
1242     ) -> CodegenResult<()> {
1243         lower_inst::lower_branch(ctx, branches, targets)
1244     }
1245 
maybe_pinned_reg(&self) -> Option<Reg>1246     fn maybe_pinned_reg(&self) -> Option<Reg> {
1247         Some(xreg(PINNED_REG))
1248     }
1249 }
1250