1 //! Lowering rules for S390x.
2 
3 use crate::ir::condcodes::{FloatCC, IntCC};
4 use crate::ir::Inst as IRInst;
5 use crate::ir::{types, Endianness, InstructionData, MemFlags, Opcode, TrapCode, Type};
6 use crate::isa::s390x::abi::*;
7 use crate::isa::s390x::inst::*;
8 use crate::isa::s390x::S390xBackend;
9 use crate::machinst::lower::*;
10 use crate::machinst::*;
11 use crate::settings::Flags;
12 use crate::CodegenResult;
13 use alloc::boxed::Box;
14 use alloc::vec::Vec;
15 use core::convert::TryFrom;
16 use regalloc::{Reg, Writable};
17 use smallvec::SmallVec;
18 
19 //=============================================================================
20 // Helpers for instruction lowering.
21 
ty_is_int(ty: Type) -> bool22 fn ty_is_int(ty: Type) -> bool {
23     match ty {
24         types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
25         types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
26         types::F32 | types::F64 => false,
27         types::IFLAGS | types::FFLAGS => panic!("Unexpected flags type"),
28         _ => panic!("ty_is_int() on unknown type: {:?}", ty),
29     }
30 }
31 
ty_is_float(ty: Type) -> bool32 fn ty_is_float(ty: Type) -> bool {
33     !ty_is_int(ty)
34 }
35 
choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T36 fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
37     let bits = ty_bits(ty);
38     if bits <= 32 {
39         op32
40     } else if bits == 64 {
41         op64
42     } else {
43         panic!("choose_32_64 on > 64 bits!")
44     }
45 }
46 
47 //============================================================================
48 // Lowering: convert instruction inputs to forms that we can use.
49 
50 /// Lower an instruction input to a 64-bit constant, if possible.
input_matches_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64>51 fn input_matches_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
52     let input = ctx.get_input_as_source_or_const(input.insn, input.input);
53     input.constant
54 }
55 
56 /// Return false if instruction input cannot have the value Imm, true otherwise.
input_maybe_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput, imm: u64) -> bool57 fn input_maybe_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput, imm: u64) -> bool {
58     if let Some(c) = input_matches_const(ctx, input) {
59         let ty = ctx.input_ty(input.insn, input.input);
60         let from_bits = ty_bits(ty) as u8;
61         let mask = if from_bits < 64 {
62             (1u64 << ty_bits(ty)) - 1
63         } else {
64             0xffff_ffff_ffff_ffff
65         };
66         c & mask == imm & mask
67     } else {
68         true
69     }
70 }
71 
72 /// Lower an instruction input to a 16-bit signed constant, if possible.
input_matches_simm16<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<i16>73 fn input_matches_simm16<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<i16> {
74     if let Some(imm_value) = input_matches_const(ctx, input) {
75         if let Ok(imm) = i16::try_from(imm_value as i64) {
76             return Some(imm);
77         }
78     }
79     None
80 }
81 
82 /// Lower an instruction input to a 32-bit signed constant, if possible.
input_matches_simm32<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<i32>83 fn input_matches_simm32<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<i32> {
84     if let Some(imm_value) = input_matches_const(ctx, input) {
85         if let Ok(imm) = i32::try_from(imm_value as i64) {
86             return Some(imm);
87         }
88     }
89     None
90 }
91 
92 /// Lower an instruction input to a 32-bit unsigned constant, if possible.
input_matches_uimm32<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u32>93 fn input_matches_uimm32<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u32> {
94     if let Some(imm_value) = input_matches_const(ctx, input) {
95         if let Ok(imm) = u32::try_from(imm_value) {
96             return Some(imm);
97         }
98     }
99     None
100 }
101 
102 /// Lower a negated instruction input to a 16-bit signed constant, if possible.
negated_input_matches_simm16<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<i16>103 fn negated_input_matches_simm16<C: LowerCtx<I = Inst>>(
104     ctx: &mut C,
105     input: InsnInput,
106 ) -> Option<i16> {
107     if let Some(imm_value) = input_matches_const(ctx, input) {
108         if let Ok(imm) = i16::try_from(-(imm_value as i64)) {
109             return Some(imm);
110         }
111     }
112     None
113 }
114 
115 /// Lower a negated instruction input to a 32-bit signed constant, if possible.
negated_input_matches_simm32<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<i32>116 fn negated_input_matches_simm32<C: LowerCtx<I = Inst>>(
117     ctx: &mut C,
118     input: InsnInput,
119 ) -> Option<i32> {
120     if let Some(imm_value) = input_matches_const(ctx, input) {
121         if let Ok(imm) = i32::try_from(-(imm_value as i64)) {
122             return Some(imm);
123         }
124     }
125     None
126 }
127 
128 /// Lower an instruction input to a 16-bit shifted constant, if possible.
input_matches_uimm16shifted<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<UImm16Shifted>129 fn input_matches_uimm16shifted<C: LowerCtx<I = Inst>>(
130     ctx: &mut C,
131     input: InsnInput,
132 ) -> Option<UImm16Shifted> {
133     if let Some(imm_value) = input_matches_const(ctx, input) {
134         return UImm16Shifted::maybe_from_u64(imm_value);
135     }
136     None
137 }
138 
139 /// Lower an instruction input to a 32-bit shifted constant, if possible.
input_matches_uimm32shifted<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<UImm32Shifted>140 fn input_matches_uimm32shifted<C: LowerCtx<I = Inst>>(
141     ctx: &mut C,
142     input: InsnInput,
143 ) -> Option<UImm32Shifted> {
144     if let Some(imm_value) = input_matches_const(ctx, input) {
145         return UImm32Shifted::maybe_from_u64(imm_value);
146     }
147     None
148 }
149 
150 /// Lower an instruction input to a 16-bit inverted shifted constant, if possible.
input_matches_uimm16shifted_inv<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<UImm16Shifted>151 fn input_matches_uimm16shifted_inv<C: LowerCtx<I = Inst>>(
152     ctx: &mut C,
153     input: InsnInput,
154 ) -> Option<UImm16Shifted> {
155     if let Some(imm_value) = input_matches_const(ctx, input) {
156         if let Some(imm) = UImm16Shifted::maybe_from_u64(!imm_value) {
157             return Some(imm.negate_bits());
158         }
159     }
160     None
161 }
162 
163 /// Lower an instruction input to a 32-bit inverted shifted constant, if possible.
input_matches_uimm32shifted_inv<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<UImm32Shifted>164 fn input_matches_uimm32shifted_inv<C: LowerCtx<I = Inst>>(
165     ctx: &mut C,
166     input: InsnInput,
167 ) -> Option<UImm32Shifted> {
168     if let Some(imm_value) = input_matches_const(ctx, input) {
169         if let Some(imm) = UImm32Shifted::maybe_from_u64(!imm_value) {
170             return Some(imm.negate_bits());
171         }
172     }
173     None
174 }
175 
176 /// Checks for an instance of `op` feeding the given input.
input_matches_insn<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, ) -> Option<IRInst>177 fn input_matches_insn<C: LowerCtx<I = Inst>>(
178     c: &mut C,
179     input: InsnInput,
180     op: Opcode,
181 ) -> Option<IRInst> {
182     let inputs = c.get_input_as_source_or_const(input.insn, input.input);
183     if let Some((src_inst, _)) = inputs.inst {
184         let data = c.data(src_inst);
185         if data.opcode() == op {
186             return Some(src_inst);
187         }
188     }
189     None
190 }
191 
192 /// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
193 /// Bint or a bitcast).
input_matches_insn_via_conv<C: LowerCtx<I = Inst>>( c: &mut C, input: InsnInput, op: Opcode, conv: Opcode, ) -> Option<IRInst>194 fn input_matches_insn_via_conv<C: LowerCtx<I = Inst>>(
195     c: &mut C,
196     input: InsnInput,
197     op: Opcode,
198     conv: Opcode,
199 ) -> Option<IRInst> {
200     let inputs = c.get_input_as_source_or_const(input.insn, input.input);
201     if let Some((src_inst, _)) = inputs.inst {
202         let data = c.data(src_inst);
203         if data.opcode() == op {
204             return Some(src_inst);
205         }
206         if data.opcode() == conv {
207             let inputs = c.get_input_as_source_or_const(src_inst, 0);
208             if let Some((src_inst, _)) = inputs.inst {
209                 let data = c.data(src_inst);
210                 if data.opcode() == op {
211                     return Some(src_inst);
212                 }
213             }
214         }
215     }
216     None
217 }
218 
input_matches_load_insn<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, op: Opcode, ) -> Option<MemArg>219 fn input_matches_load_insn<C: LowerCtx<I = Inst>>(
220     ctx: &mut C,
221     input: InsnInput,
222     op: Opcode,
223 ) -> Option<MemArg> {
224     if let Some(insn) = input_matches_insn(ctx, input, op) {
225         let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
226             .map(|i| InsnInput { insn, input: i })
227             .collect();
228         let off = ctx.data(insn).load_store_offset().unwrap();
229         let flags = ctx.memflags(insn).unwrap();
230         let endianness = flags.endianness(Endianness::Big);
231         if endianness == Endianness::Big {
232             let mem = lower_address(ctx, &inputs[..], off, flags);
233             ctx.sink_inst(insn);
234             return Some(mem);
235         }
236     }
237     None
238 }
239 
input_matches_mem<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<MemArg>240 fn input_matches_mem<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<MemArg> {
241     if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 {
242         return input_matches_load_insn(ctx, input, Opcode::Load);
243     }
244     None
245 }
246 
input_matches_sext16_mem<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<MemArg>247 fn input_matches_sext16_mem<C: LowerCtx<I = Inst>>(
248     ctx: &mut C,
249     input: InsnInput,
250 ) -> Option<MemArg> {
251     if ty_bits(ctx.input_ty(input.insn, input.input)) == 16 {
252         return input_matches_load_insn(ctx, input, Opcode::Load);
253     }
254     if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 {
255         return input_matches_load_insn(ctx, input, Opcode::Sload16);
256     }
257     None
258 }
259 
input_matches_sext32_mem<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<MemArg>260 fn input_matches_sext32_mem<C: LowerCtx<I = Inst>>(
261     ctx: &mut C,
262     input: InsnInput,
263 ) -> Option<MemArg> {
264     if ty_bits(ctx.input_ty(input.insn, input.input)) > 32 {
265         return input_matches_load_insn(ctx, input, Opcode::Sload32);
266     }
267     None
268 }
269 
input_matches_sext32_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<Reg>270 fn input_matches_sext32_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<Reg> {
271     if let Some(insn) = input_matches_insn(ctx, input, Opcode::Sextend) {
272         if ty_bits(ctx.input_ty(insn, 0)) == 32 {
273             let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
274             return Some(reg);
275         }
276     }
277     None
278 }
279 
input_matches_uext32_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<Reg>280 fn input_matches_uext32_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<Reg> {
281     if let Some(insn) = input_matches_insn(ctx, input, Opcode::Uextend) {
282         if ty_bits(ctx.input_ty(insn, 0)) == 32 {
283             let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
284             return Some(reg);
285         }
286     }
287     None
288 }
289 
input_matches_uext16_mem<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<MemArg>290 fn input_matches_uext16_mem<C: LowerCtx<I = Inst>>(
291     ctx: &mut C,
292     input: InsnInput,
293 ) -> Option<MemArg> {
294     if ty_bits(ctx.input_ty(input.insn, input.input)) == 16 {
295         return input_matches_load_insn(ctx, input, Opcode::Load);
296     }
297     if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 {
298         return input_matches_load_insn(ctx, input, Opcode::Uload16);
299     }
300     None
301 }
302 
input_matches_uext32_mem<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, ) -> Option<MemArg>303 fn input_matches_uext32_mem<C: LowerCtx<I = Inst>>(
304     ctx: &mut C,
305     input: InsnInput,
306 ) -> Option<MemArg> {
307     if ty_bits(ctx.input_ty(input.insn, input.input)) > 32 {
308         return input_matches_load_insn(ctx, input, Opcode::Uload32);
309     }
310     None
311 }
312 
313 //============================================================================
314 // Lowering: force instruction input into a register
315 
316 /// How to handle narrow values loaded into registers; see note on `narrow_mode`
317 /// parameter to `put_input_in_*` below.
318 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
319 enum NarrowValueMode {
320     None,
321     /// Zero-extend to 32 bits if original is < 32 bits.
322     ZeroExtend32,
323     /// Sign-extend to 32 bits if original is < 32 bits.
324     SignExtend32,
325     /// Zero-extend to 64 bits if original is < 64 bits.
326     ZeroExtend64,
327     /// Sign-extend to 64 bits if original is < 64 bits.
328     SignExtend64,
329 }
330 
extend_memory_to_reg<C: LowerCtx<I = Inst>>( ctx: &mut C, mem: MemArg, from_ty: Type, to_ty: Type, signed: bool, ) -> Reg331 fn extend_memory_to_reg<C: LowerCtx<I = Inst>>(
332     ctx: &mut C,
333     mem: MemArg,
334     from_ty: Type,
335     to_ty: Type,
336     signed: bool,
337 ) -> Reg {
338     let rd = ctx.alloc_tmp(to_ty).only_reg().unwrap();
339     ctx.emit(match (signed, ty_bits(to_ty), ty_bits(from_ty)) {
340         (false, 32, 8) => Inst::Load32ZExt8 { rd, mem },
341         (false, 32, 16) => Inst::Load32ZExt16 { rd, mem },
342         (true, 32, 8) => Inst::Load32SExt8 { rd, mem },
343         (true, 32, 16) => Inst::Load32SExt16 { rd, mem },
344         (false, 64, 8) => Inst::Load64ZExt8 { rd, mem },
345         (false, 64, 16) => Inst::Load64ZExt16 { rd, mem },
346         (false, 64, 32) => Inst::Load64ZExt32 { rd, mem },
347         (true, 64, 8) => Inst::Load64SExt8 { rd, mem },
348         (true, 64, 16) => Inst::Load64SExt16 { rd, mem },
349         (true, 64, 32) => Inst::Load64SExt32 { rd, mem },
350         _ => panic!("Unsupported size in load"),
351     });
352     rd.to_reg()
353 }
354 
355 /// Sign-extend the low `from_bits` bits of `value` to a full u64.
sign_extend_to_u64(value: u64, from_bits: u8) -> u64356 fn sign_extend_to_u64(value: u64, from_bits: u8) -> u64 {
357     assert!(from_bits <= 64);
358     if from_bits >= 64 {
359         value
360     } else {
361         (((value << (64 - from_bits)) as i64) >> (64 - from_bits)) as u64
362     }
363 }
364 
365 /// Zero-extend the low `from_bits` bits of `value` to a full u64.
zero_extend_to_u64(value: u64, from_bits: u8) -> u64366 fn zero_extend_to_u64(value: u64, from_bits: u8) -> u64 {
367     assert!(from_bits <= 64);
368     if from_bits >= 64 {
369         value
370     } else {
371         value & ((1u64 << from_bits) - 1)
372     }
373 }
374 
375 /// Lower an instruction input to a reg.
376 ///
377 /// The given register will be extended appropriately, according to
378 /// `narrow_mode` and the input's type.
put_input_in_reg<C: LowerCtx<I = Inst>>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> Reg379 fn put_input_in_reg<C: LowerCtx<I = Inst>>(
380     ctx: &mut C,
381     input: InsnInput,
382     narrow_mode: NarrowValueMode,
383 ) -> Reg {
384     let signed = match narrow_mode {
385         NarrowValueMode::SignExtend32 | NarrowValueMode::SignExtend64 => true,
386         NarrowValueMode::ZeroExtend32 | NarrowValueMode::ZeroExtend64 => false,
387         _ => false,
388     };
389     let ty = ctx.input_ty(input.insn, input.input);
390     let from_bits = ty_bits(ty) as u8;
391     let ext_ty = match narrow_mode {
392         NarrowValueMode::None => ty,
393         NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => types::I32,
394         NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => types::I64,
395     };
396     let to_bits = ty_bits(ext_ty) as u8;
397     assert!(to_bits >= from_bits);
398 
399     if let Some(c) = input_matches_const(ctx, input) {
400         let extended = if from_bits == to_bits {
401             c
402         } else if signed {
403             sign_extend_to_u64(c, from_bits)
404         } else {
405             zero_extend_to_u64(c, from_bits)
406         };
407         let masked = zero_extend_to_u64(extended, to_bits);
408 
409         // Generate constants fresh at each use to minimize long-range register pressure.
410         let to_reg = ctx.alloc_tmp(ext_ty).only_reg().unwrap();
411         for inst in Inst::gen_constant(ValueRegs::one(to_reg), masked as u128, ext_ty, |ty| {
412             ctx.alloc_tmp(ty).only_reg().unwrap()
413         })
414         .into_iter()
415         {
416             ctx.emit(inst);
417         }
418         to_reg.to_reg()
419     } else if to_bits == from_bits {
420         ctx.put_input_in_regs(input.insn, input.input)
421             .only_reg()
422             .unwrap()
423     } else if let Some(mem) = input_matches_load_insn(ctx, input, Opcode::Load) {
424         extend_memory_to_reg(ctx, mem, ty, ext_ty, signed)
425     } else {
426         let rd = ctx.alloc_tmp(ext_ty).only_reg().unwrap();
427         let rn = ctx
428             .put_input_in_regs(input.insn, input.input)
429             .only_reg()
430             .unwrap();
431         ctx.emit(Inst::Extend {
432             rd,
433             rn,
434             signed,
435             from_bits,
436             to_bits,
437         });
438         rd.to_reg()
439     }
440 }
441 
442 //============================================================================
443 // Lowering: addressing mode support. Takes instruction directly, rather
444 // than an `InsnInput`, to do more introspection.
445 
446 /// Lower the address of a load or store.
lower_address<C: LowerCtx<I = Inst>>( ctx: &mut C, addends: &[InsnInput], offset: i32, flags: MemFlags, ) -> MemArg447 fn lower_address<C: LowerCtx<I = Inst>>(
448     ctx: &mut C,
449     addends: &[InsnInput],
450     offset: i32,
451     flags: MemFlags,
452 ) -> MemArg {
453     // Handle one reg and offset.
454     if addends.len() == 1 {
455         if offset == 0 {
456             if let Some(add) = input_matches_insn(ctx, addends[0], Opcode::Iadd) {
457                 debug_assert_eq!(ctx.output_ty(add, 0), types::I64);
458                 let add_inputs = &[
459                     InsnInput {
460                         insn: add,
461                         input: 0,
462                     },
463                     InsnInput {
464                         insn: add,
465                         input: 1,
466                     },
467                 ];
468 
469                 let ra = put_input_in_reg(ctx, add_inputs[0], NarrowValueMode::None);
470                 let rb = put_input_in_reg(ctx, add_inputs[1], NarrowValueMode::None);
471                 return MemArg::reg_plus_reg(ra, rb, flags);
472             }
473         }
474 
475         if let Some(symbol) = input_matches_insn(ctx, addends[0], Opcode::SymbolValue) {
476             let (extname, dist, ext_offset) = ctx.symbol_value(symbol).unwrap();
477             let ext_offset = ext_offset + i64::from(offset);
478             if dist == RelocDistance::Near && (ext_offset & 1) == 0 {
479                 if let Ok(offset) = i32::try_from(ext_offset) {
480                     return MemArg::Symbol {
481                         name: Box::new(extname.clone()),
482                         offset,
483                         flags,
484                     };
485                 }
486             }
487         }
488 
489         let reg = put_input_in_reg(ctx, addends[0], NarrowValueMode::None);
490         return MemArg::reg_plus_off(reg, offset as i64, flags);
491     }
492 
493     // Handle two regs and a zero offset.
494     if addends.len() == 2 && offset == 0 {
495         let ra = put_input_in_reg(ctx, addends[0], NarrowValueMode::None);
496         let rb = put_input_in_reg(ctx, addends[1], NarrowValueMode::None);
497         return MemArg::reg_plus_reg(ra, rb, flags);
498     }
499 
500     // Otherwise, generate add instructions.
501     let addr = ctx.alloc_tmp(types::I64).only_reg().unwrap();
502 
503     // Get the const into a reg.
504     lower_constant_u64(ctx, addr.clone(), offset as u64);
505 
506     // Add each addend to the address.
507     for addend in addends {
508         let reg = put_input_in_reg(ctx, *addend, NarrowValueMode::None);
509 
510         ctx.emit(Inst::AluRRR {
511             alu_op: ALUOp::Add64,
512             rd: addr.clone(),
513             rn: addr.to_reg(),
514             rm: reg.clone(),
515         });
516     }
517 
518     MemArg::reg(addr.to_reg(), flags)
519 }
520 
521 //============================================================================
522 // Lowering: generating constants.
523 
lower_constant_u64<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: u64)524 fn lower_constant_u64<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: u64) {
525     for inst in Inst::load_constant64(rd, value) {
526         ctx.emit(inst);
527     }
528 }
529 
lower_constant_u32<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: u32)530 fn lower_constant_u32<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: u32) {
531     for inst in Inst::load_constant32(rd, value) {
532         ctx.emit(inst);
533     }
534 }
535 
lower_constant_f32<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f32)536 fn lower_constant_f32<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f32) {
537     ctx.emit(Inst::load_fp_constant32(rd, value));
538 }
539 
lower_constant_f64<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f64)540 fn lower_constant_f64<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f64) {
541     ctx.emit(Inst::load_fp_constant64(rd, value));
542 }
543 
544 //=============================================================================
545 // Lowering: comparisons
546 
547 /// Determines whether this condcode interprets inputs as signed or
548 /// unsigned.  See the documentation for the `icmp` instruction in
549 /// cranelift-codegen/meta/src/shared/instructions.rs for further insights
550 /// into this.
condcode_is_signed(cc: IntCC) -> bool551 pub fn condcode_is_signed(cc: IntCC) -> bool {
552     match cc {
553         IntCC::Equal => false,
554         IntCC::NotEqual => false,
555         IntCC::SignedGreaterThanOrEqual => true,
556         IntCC::SignedGreaterThan => true,
557         IntCC::SignedLessThanOrEqual => true,
558         IntCC::SignedLessThan => true,
559         IntCC::UnsignedGreaterThanOrEqual => false,
560         IntCC::UnsignedGreaterThan => false,
561         IntCC::UnsignedLessThanOrEqual => false,
562         IntCC::UnsignedLessThan => false,
563         IntCC::Overflow => true,
564         IntCC::NotOverflow => true,
565     }
566 }
567 
lower_icmp_to_flags<C: LowerCtx<I = Inst>>( ctx: &mut C, insn: IRInst, is_signed: bool, may_sink_memory: bool, )568 fn lower_icmp_to_flags<C: LowerCtx<I = Inst>>(
569     ctx: &mut C,
570     insn: IRInst,
571     is_signed: bool,
572     may_sink_memory: bool,
573 ) {
574     let ty = ctx.input_ty(insn, 0);
575     let bits = ty_bits(ty);
576     let narrow_mode = match (bits <= 32, is_signed) {
577         (true, true) => NarrowValueMode::SignExtend32,
578         (true, false) => NarrowValueMode::ZeroExtend32,
579         (false, true) => NarrowValueMode::SignExtend64,
580         (false, false) => NarrowValueMode::ZeroExtend64,
581     };
582     let inputs = [
583         InsnInput {
584             insn: insn,
585             input: 0,
586         },
587         InsnInput {
588             insn: insn,
589             input: 1,
590         },
591     ];
592     let ty = ctx.input_ty(insn, 0);
593     let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
594     if is_signed {
595         let op = choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64);
596         // Try matching immedate operand.
597         if let Some(imm) = input_matches_simm16(ctx, inputs[1]) {
598             return ctx.emit(Inst::CmpRSImm16 { op, rn, imm });
599         }
600         if let Some(imm) = input_matches_simm32(ctx, inputs[1]) {
601             return ctx.emit(Inst::CmpRSImm32 { op, rn, imm });
602         }
603         // If sinking memory loads is allowed, try matching memory operand.
604         if may_sink_memory {
605             if let Some(mem) = input_matches_mem(ctx, inputs[1]) {
606                 return ctx.emit(Inst::CmpRX { op, rn, mem });
607             }
608             if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) {
609                 let op = choose_32_64(ty, CmpOp::CmpS32Ext16, CmpOp::CmpS64Ext16);
610                 return ctx.emit(Inst::CmpRX { op, rn, mem });
611             }
612             if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) {
613                 return ctx.emit(Inst::CmpRX {
614                     op: CmpOp::CmpS64Ext32,
615                     rn,
616                     mem,
617                 });
618             }
619         }
620         // Try matching sign-extension in register.
621         if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) {
622             return ctx.emit(Inst::CmpRR {
623                 op: CmpOp::CmpS64Ext32,
624                 rn,
625                 rm,
626             });
627         }
628         // If no special case matched above, fall back to a register compare.
629         let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
630         return ctx.emit(Inst::CmpRR { op, rn, rm });
631     } else {
632         let op = choose_32_64(ty, CmpOp::CmpL32, CmpOp::CmpL64);
633         // Try matching immedate operand.
634         if let Some(imm) = input_matches_uimm32(ctx, inputs[1]) {
635             return ctx.emit(Inst::CmpRUImm32 { op, rn, imm });
636         }
637         // If sinking memory loads is allowed, try matching memory operand.
638         if may_sink_memory {
639             if let Some(mem) = input_matches_mem(ctx, inputs[1]) {
640                 return ctx.emit(Inst::CmpRX { op, rn, mem });
641             }
642             if let Some(mem) = input_matches_uext16_mem(ctx, inputs[1]) {
643                 match &mem {
644                     &MemArg::Symbol { .. } => {
645                         let op = choose_32_64(ty, CmpOp::CmpL32Ext16, CmpOp::CmpL64Ext16);
646                         return ctx.emit(Inst::CmpRX { op, rn, mem });
647                     }
648                     _ => {
649                         let reg_ty = choose_32_64(ty, types::I32, types::I64);
650                         let rm = extend_memory_to_reg(ctx, mem, ty, reg_ty, false);
651                         return ctx.emit(Inst::CmpRR { op, rn, rm });
652                     }
653                 }
654             }
655             if let Some(mem) = input_matches_uext32_mem(ctx, inputs[1]) {
656                 return ctx.emit(Inst::CmpRX {
657                     op: CmpOp::CmpL64Ext32,
658                     rn,
659                     mem,
660                 });
661             }
662         }
663         // Try matching zero-extension in register.
664         if let Some(rm) = input_matches_uext32_reg(ctx, inputs[1]) {
665             return ctx.emit(Inst::CmpRR {
666                 op: CmpOp::CmpL64Ext32,
667                 rn,
668                 rm,
669             });
670         }
671         // If no special case matched above, fall back to a register compare.
672         let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
673         return ctx.emit(Inst::CmpRR { op, rn, rm });
674     }
675 }
676 
lower_fcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst)677 fn lower_fcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
678     let ty = ctx.input_ty(insn, 0);
679     let bits = ty_bits(ty);
680     let inputs = [
681         InsnInput {
682             insn: insn,
683             input: 0,
684         },
685         InsnInput {
686             insn: insn,
687             input: 1,
688         },
689     ];
690     let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
691     let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
692     match bits {
693         32 => {
694             ctx.emit(Inst::FpuCmp32 { rn, rm });
695         }
696         64 => {
697             ctx.emit(Inst::FpuCmp64 { rn, rm });
698         }
699         _ => panic!("Unknown float size"),
700     }
701 }
702 
lower_boolean_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Cond703 fn lower_boolean_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Cond {
704     if let Some(icmp_insn) = input_matches_insn_via_conv(ctx, input, Opcode::Icmp, Opcode::Bint) {
705         // FIXME: If the Icmp (and Bint) only have a single use, we can still allow sinking memory
706         let may_sink_memory = false;
707         let condcode = ctx.data(icmp_insn).cond_code().unwrap();
708         let is_signed = condcode_is_signed(condcode);
709         lower_icmp_to_flags(ctx, icmp_insn, is_signed, may_sink_memory);
710         Cond::from_intcc(condcode)
711     } else if let Some(fcmp_insn) =
712         input_matches_insn_via_conv(ctx, input, Opcode::Fcmp, Opcode::Bint)
713     {
714         let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap();
715         lower_fcmp_to_flags(ctx, fcmp_insn);
716         Cond::from_floatcc(condcode)
717     } else {
718         let ty = ctx.input_ty(input.insn, input.input);
719         let narrow_mode = if ty.bits() < 32 {
720             NarrowValueMode::ZeroExtend32
721         } else {
722             NarrowValueMode::None
723         };
724         let rn = put_input_in_reg(ctx, input, narrow_mode);
725         let op = choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64);
726         ctx.emit(Inst::CmpRSImm16 { op, rn, imm: 0 });
727         Cond::from_intcc(IntCC::NotEqual)
728     }
729 }
730 
lower_flags_to_bool_result<C: LowerCtx<I = Inst>>( ctx: &mut C, cond: Cond, rd: Writable<Reg>, ty: Type, )731 fn lower_flags_to_bool_result<C: LowerCtx<I = Inst>>(
732     ctx: &mut C,
733     cond: Cond,
734     rd: Writable<Reg>,
735     ty: Type,
736 ) {
737     if ty_bits(ty) == 1 {
738         lower_constant_u32(ctx, rd, 0);
739         ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: 1 });
740     } else if ty_bits(ty) < 64 {
741         lower_constant_u32(ctx, rd, 0);
742         ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: -1 });
743     } else {
744         lower_constant_u64(ctx, rd, 0);
745         ctx.emit(Inst::CMov64SImm16 { rd, cond, imm: -1 });
746     }
747 }
748 
749 //============================================================================
750 // Lowering: main entry point for lowering a instruction
751 
lower_insn_to_regs<C: LowerCtx<I = Inst>>( ctx: &mut C, insn: IRInst, flags: &Flags, ) -> CodegenResult<()>752 fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
753     ctx: &mut C,
754     insn: IRInst,
755     flags: &Flags,
756 ) -> CodegenResult<()> {
757     let op = ctx.data(insn).opcode();
758     let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
759         .map(|i| InsnInput { insn, input: i })
760         .collect();
761     let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
762         .map(|i| InsnOutput { insn, output: i })
763         .collect();
764     let ty = if outputs.len() > 0 {
765         Some(ctx.output_ty(insn, 0))
766     } else {
767         None
768     };
769 
770     match op {
771         Opcode::Nop => {
772             // Nothing.
773         }
774 
775         Opcode::Copy | Opcode::Ireduce | Opcode::Breduce => {
776             // Smaller ints / bools have the high bits undefined, so any reduce
777             // operation is simply a copy.
778             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
779             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
780             let ty = ctx.input_ty(insn, 0);
781             ctx.emit(Inst::gen_move(rd, rn, ty));
782         }
783 
784         Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
785             let value = ctx.get_constant(insn).unwrap();
786             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
787             let ty = ty.unwrap();
788             if ty.bits() <= 32 {
789                 lower_constant_u32(ctx, rd, value as u32);
790             } else {
791                 lower_constant_u64(ctx, rd, value);
792             }
793         }
794         Opcode::F32const => {
795             let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32);
796             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
797             lower_constant_f32(ctx, rd, value);
798         }
799         Opcode::F64const => {
800             let value = f64::from_bits(ctx.get_constant(insn).unwrap());
801             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
802             lower_constant_f64(ctx, rd, value);
803         }
804 
805         Opcode::Iadd => {
806             let ty = ty.unwrap();
807             let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64);
808             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
809             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
810             if let Some(imm) = input_matches_simm16(ctx, inputs[1]) {
811                 ctx.emit(Inst::AluRRSImm16 {
812                     alu_op,
813                     rd,
814                     rn,
815                     imm,
816                 });
817             } else if let Some(imm) = input_matches_simm32(ctx, inputs[1]) {
818                 ctx.emit(Inst::gen_move(rd, rn, ty));
819                 ctx.emit(Inst::AluRSImm32 { alu_op, rd, imm });
820             } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) {
821                 ctx.emit(Inst::gen_move(rd, rn, ty));
822                 ctx.emit(Inst::AluRX { alu_op, rd, mem });
823             } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) {
824                 let alu_op = choose_32_64(ty, ALUOp::Add32Ext16, ALUOp::Add64Ext16);
825                 ctx.emit(Inst::gen_move(rd, rn, ty));
826                 ctx.emit(Inst::AluRX { alu_op, rd, mem });
827             } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) {
828                 ctx.emit(Inst::gen_move(rd, rn, ty));
829                 ctx.emit(Inst::AluRX {
830                     alu_op: ALUOp::Add64Ext32,
831                     rd,
832                     mem,
833                 });
834             } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) {
835                 ctx.emit(Inst::gen_move(rd, rn, ty));
836                 ctx.emit(Inst::AluRR {
837                     alu_op: ALUOp::Add64Ext32,
838                     rd,
839                     rm,
840                 });
841             } else {
842                 let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
843                 ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
844             }
845         }
846         Opcode::Isub => {
847             let ty = ty.unwrap();
848             let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
849             let neg_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64);
850             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
851             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
852             if let Some(imm) = negated_input_matches_simm16(ctx, inputs[1]) {
853                 ctx.emit(Inst::AluRRSImm16 {
854                     alu_op: neg_op,
855                     rd,
856                     rn,
857                     imm,
858                 });
859             } else if let Some(imm) = negated_input_matches_simm32(ctx, inputs[1]) {
860                 ctx.emit(Inst::gen_move(rd, rn, ty));
861                 ctx.emit(Inst::AluRSImm32 {
862                     alu_op: neg_op,
863                     rd,
864                     imm,
865                 });
866             } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) {
867                 ctx.emit(Inst::gen_move(rd, rn, ty));
868                 ctx.emit(Inst::AluRX { alu_op, rd, mem });
869             } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) {
870                 let alu_op = choose_32_64(ty, ALUOp::Sub32Ext16, ALUOp::Sub64Ext16);
871                 ctx.emit(Inst::gen_move(rd, rn, ty));
872                 ctx.emit(Inst::AluRX { alu_op, rd, mem });
873             } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) {
874                 ctx.emit(Inst::gen_move(rd, rn, ty));
875                 ctx.emit(Inst::AluRX {
876                     alu_op: ALUOp::Sub64Ext32,
877                     rd,
878                     mem,
879                 });
880             } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) {
881                 ctx.emit(Inst::gen_move(rd, rn, ty));
882                 ctx.emit(Inst::AluRR {
883                     alu_op: ALUOp::Sub64Ext32,
884                     rd,
885                     rm,
886                 });
887             } else {
888                 let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
889                 ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
890             }
891         }
892 
893         Opcode::UaddSat | Opcode::SaddSat => unimplemented!(),
894         Opcode::UsubSat | Opcode::SsubSat => unimplemented!(),
895 
896         Opcode::Iabs => {
897             let ty = ty.unwrap();
898             let op = choose_32_64(ty, UnaryOp::Abs32, UnaryOp::Abs64);
899             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
900             if let Some(rn) = input_matches_sext32_reg(ctx, inputs[0]) {
901                 ctx.emit(Inst::UnaryRR {
902                     op: UnaryOp::Abs64Ext32,
903                     rd,
904                     rn,
905                 });
906             } else {
907                 let narrow_mode = if ty.bits() < 32 {
908                     NarrowValueMode::SignExtend32
909                 } else {
910                     NarrowValueMode::None
911                 };
912                 let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
913                 ctx.emit(Inst::UnaryRR { op, rd, rn });
914             }
915         }
916         Opcode::Ineg => {
917             let ty = ty.unwrap();
918             let op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64);
919             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
920             if let Some(rn) = input_matches_sext32_reg(ctx, inputs[0]) {
921                 ctx.emit(Inst::UnaryRR {
922                     op: UnaryOp::Neg64Ext32,
923                     rd,
924                     rn,
925                 });
926             } else {
927                 let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
928                 ctx.emit(Inst::UnaryRR { op, rd, rn });
929             }
930         }
931 
932         Opcode::Imul => {
933             let ty = ty.unwrap();
934             let alu_op = choose_32_64(ty, ALUOp::Mul32, ALUOp::Mul64);
935             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
936             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
937             if let Some(imm) = input_matches_simm16(ctx, inputs[1]) {
938                 ctx.emit(Inst::gen_move(rd, rn, ty));
939                 ctx.emit(Inst::AluRSImm16 { alu_op, rd, imm });
940             } else if let Some(imm) = input_matches_simm32(ctx, inputs[1]) {
941                 ctx.emit(Inst::gen_move(rd, rn, ty));
942                 ctx.emit(Inst::AluRSImm32 { alu_op, rd, imm });
943             } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) {
944                 ctx.emit(Inst::gen_move(rd, rn, ty));
945                 ctx.emit(Inst::AluRX { alu_op, rd, mem });
946             } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) {
947                 let alu_op = choose_32_64(ty, ALUOp::Mul32Ext16, ALUOp::Mul64Ext16);
948                 ctx.emit(Inst::gen_move(rd, rn, ty));
949                 ctx.emit(Inst::AluRX { alu_op, rd, mem });
950             } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) {
951                 ctx.emit(Inst::gen_move(rd, rn, ty));
952                 ctx.emit(Inst::AluRX {
953                     alu_op: ALUOp::Mul64Ext32,
954                     rd,
955                     mem,
956                 });
957             } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) {
958                 ctx.emit(Inst::gen_move(rd, rn, ty));
959                 ctx.emit(Inst::AluRR {
960                     alu_op: ALUOp::Mul64Ext32,
961                     rd,
962                     rm,
963                 });
964             } else {
965                 let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
966                 ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
967             }
968         }
969 
970         Opcode::Umulhi | Opcode::Smulhi => {
971             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
972             let is_signed = op == Opcode::Smulhi;
973             let input_ty = ctx.input_ty(insn, 0);
974             assert!(ctx.input_ty(insn, 1) == input_ty);
975             assert!(ctx.output_ty(insn, 0) == input_ty);
976 
977             match input_ty {
978                 types::I64 => {
979                     let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
980                     let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
981 
982                     if is_signed {
983                         ctx.emit(Inst::SMulWide { rn, rm });
984                         ctx.emit(Inst::gen_move(rd, gpr(0), input_ty));
985                     } else {
986                         ctx.emit(Inst::gen_move(writable_gpr(1), rm, input_ty));
987                         ctx.emit(Inst::UMulWide { rn });
988                         ctx.emit(Inst::gen_move(rd, gpr(0), input_ty));
989                     }
990                 }
991                 types::I32 => {
992                     let narrow_mode = if is_signed {
993                         NarrowValueMode::SignExtend64
994                     } else {
995                         NarrowValueMode::ZeroExtend64
996                     };
997                     let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
998                     let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
999                     ctx.emit(Inst::AluRRR {
1000                         alu_op: ALUOp::Mul64,
1001                         rd,
1002                         rn,
1003                         rm,
1004                     });
1005                     let shift_op = if is_signed {
1006                         ShiftOp::AShR64
1007                     } else {
1008                         ShiftOp::LShR64
1009                     };
1010                     ctx.emit(Inst::ShiftRR {
1011                         shift_op,
1012                         rd,
1013                         rn: rd.to_reg(),
1014                         shift_imm: SImm20::maybe_from_i64(32).unwrap(),
1015                         shift_reg: None,
1016                     });
1017                 }
1018                 types::I16 | types::I8 => {
1019                     let narrow_mode = if is_signed {
1020                         NarrowValueMode::SignExtend32
1021                     } else {
1022                         NarrowValueMode::ZeroExtend32
1023                     };
1024                     let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
1025                     let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
1026                     ctx.emit(Inst::AluRRR {
1027                         alu_op: ALUOp::Mul32,
1028                         rd,
1029                         rn,
1030                         rm,
1031                     });
1032                     let shift_op = if is_signed {
1033                         ShiftOp::AShR32
1034                     } else {
1035                         ShiftOp::LShR32
1036                     };
1037                     let shift_amt = match input_ty {
1038                         types::I16 => 16,
1039                         types::I8 => 8,
1040                         _ => unreachable!(),
1041                     };
1042                     ctx.emit(Inst::ShiftRR {
1043                         shift_op,
1044                         rd,
1045                         rn: rd.to_reg(),
1046                         shift_imm: SImm20::maybe_from_i64(shift_amt).unwrap(),
1047                         shift_reg: None,
1048                     });
1049                 }
1050                 _ => {
1051                     panic!("Unsupported argument type for umulhi/smulhi: {}", input_ty);
1052                 }
1053             }
1054         }
1055 
1056         Opcode::Udiv | Opcode::Urem => {
1057             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1058             let ty = ty.unwrap();
1059 
1060             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1061             if ty_bits(ty) <= 32 {
1062                 lower_constant_u32(ctx, writable_gpr(0), 0);
1063                 if ty_bits(ty) < 32 {
1064                     ctx.emit(Inst::Extend {
1065                         rd: writable_gpr(1),
1066                         rn,
1067                         signed: false,
1068                         from_bits: ty_bits(ty) as u8,
1069                         to_bits: 32,
1070                     });
1071                 } else {
1072                     ctx.emit(Inst::mov32(writable_gpr(1), rn));
1073                 }
1074             } else {
1075                 lower_constant_u64(ctx, writable_gpr(0), 0);
1076                 ctx.emit(Inst::mov64(writable_gpr(1), rn));
1077             }
1078 
1079             let narrow_mode = if ty.bits() < 32 {
1080                 NarrowValueMode::ZeroExtend32
1081             } else {
1082                 NarrowValueMode::None
1083             };
1084             let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
1085 
1086             if input_maybe_imm(ctx, inputs[1], 0) && flags.avoid_div_traps() {
1087                 ctx.emit(Inst::CmpTrapRSImm16 {
1088                     op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64),
1089                     rn: rm,
1090                     imm: 0,
1091                     cond: Cond::from_intcc(IntCC::Equal),
1092                     trap_code: TrapCode::IntegerDivisionByZero,
1093                 });
1094             }
1095 
1096             if ty_bits(ty) <= 32 {
1097                 ctx.emit(Inst::UDivMod32 { rn: rm });
1098             } else {
1099                 ctx.emit(Inst::UDivMod64 { rn: rm });
1100             }
1101 
1102             if op == Opcode::Udiv {
1103                 ctx.emit(Inst::gen_move(rd, gpr(1), ty));
1104             } else {
1105                 ctx.emit(Inst::gen_move(rd, gpr(0), ty));
1106             }
1107         }
1108 
1109         Opcode::Sdiv | Opcode::Srem => {
1110             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1111             let ty = ty.unwrap();
1112 
1113             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1114             if ty_bits(ty) < 64 {
1115                 ctx.emit(Inst::Extend {
1116                     rd: writable_gpr(1),
1117                     rn,
1118                     signed: true,
1119                     from_bits: ty_bits(ty) as u8,
1120                     to_bits: 64,
1121                 });
1122             } else {
1123                 ctx.emit(Inst::mov64(writable_gpr(1), rn));
1124             }
1125 
1126             let narrow_mode = if ty.bits() < 32 {
1127                 NarrowValueMode::SignExtend32
1128             } else {
1129                 NarrowValueMode::None
1130             };
1131             let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
1132 
1133             if input_maybe_imm(ctx, inputs[1], 0) && flags.avoid_div_traps() {
1134                 ctx.emit(Inst::CmpTrapRSImm16 {
1135                     op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64),
1136                     rn: rm,
1137                     imm: 0,
1138                     cond: Cond::from_intcc(IntCC::Equal),
1139                     trap_code: TrapCode::IntegerDivisionByZero,
1140                 });
1141             }
1142 
1143             if input_maybe_imm(ctx, inputs[1], 0xffff_ffff_ffff_ffff) {
1144                 if op == Opcode::Sdiv {
1145                     let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
1146                     if ty_bits(ty) <= 32 {
1147                         lower_constant_u32(ctx, tmp, (1 << (ty_bits(ty) - 1)) - 1);
1148                     } else {
1149                         lower_constant_u64(ctx, tmp, (1 << (ty_bits(ty) - 1)) - 1);
1150                     }
1151                     ctx.emit(Inst::AluRRR {
1152                         alu_op: choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64),
1153                         rd: tmp,
1154                         rn: tmp.to_reg(),
1155                         rm: gpr(1),
1156                     });
1157                     ctx.emit(Inst::AluRRR {
1158                         alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64),
1159                         rd: tmp,
1160                         rn: tmp.to_reg(),
1161                         rm,
1162                     });
1163                     ctx.emit(Inst::CmpTrapRSImm16 {
1164                         op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64),
1165                         rn: tmp.to_reg(),
1166                         imm: -1,
1167                         cond: Cond::from_intcc(IntCC::Equal),
1168                         trap_code: TrapCode::IntegerOverflow,
1169                     });
1170                 } else {
1171                     if ty_bits(ty) > 32 {
1172                         ctx.emit(Inst::CmpRSImm16 {
1173                             op: CmpOp::CmpS64,
1174                             rn: rm,
1175                             imm: -1,
1176                         });
1177                         ctx.emit(Inst::CMov64SImm16 {
1178                             rd: writable_gpr(1),
1179                             cond: Cond::from_intcc(IntCC::Equal),
1180                             imm: 0,
1181                         });
1182                     }
1183                 }
1184             }
1185 
1186             if ty_bits(ty) <= 32 {
1187                 ctx.emit(Inst::SDivMod32 { rn: rm });
1188             } else {
1189                 ctx.emit(Inst::SDivMod64 { rn: rm });
1190             }
1191 
1192             if op == Opcode::Sdiv {
1193                 ctx.emit(Inst::gen_move(rd, gpr(1), ty));
1194             } else {
1195                 ctx.emit(Inst::gen_move(rd, gpr(0), ty));
1196             }
1197         }
1198 
1199         Opcode::Uextend | Opcode::Sextend => {
1200             let ty = ty.unwrap();
1201             let to_bits = ty_bits(ty) as u8;
1202             let to_bits = std::cmp::max(32, to_bits);
1203             let narrow_mode = match (op, to_bits) {
1204                 (Opcode::Uextend, 32) => NarrowValueMode::ZeroExtend32,
1205                 (Opcode::Uextend, 64) => NarrowValueMode::ZeroExtend64,
1206                 (Opcode::Sextend, 32) => NarrowValueMode::SignExtend32,
1207                 (Opcode::Sextend, 64) => NarrowValueMode::SignExtend64,
1208                 _ => unreachable!(),
1209             };
1210             let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
1211             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1212             ctx.emit(Inst::gen_move(rd, rn, ty));
1213         }
1214 
1215         Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
1216             let ty = ty.unwrap();
1217             let size = ty_bits(ty);
1218             let narrow_mode = match (op, size) {
1219                 (Opcode::Ishl, _) => NarrowValueMode::None,
1220                 (Opcode::Ushr, 64) => NarrowValueMode::ZeroExtend64,
1221                 (Opcode::Ushr, _) => NarrowValueMode::ZeroExtend32,
1222                 (Opcode::Sshr, 64) => NarrowValueMode::SignExtend64,
1223                 (Opcode::Sshr, _) => NarrowValueMode::SignExtend32,
1224                 _ => unreachable!(),
1225             };
1226             let shift_op = match op {
1227                 Opcode::Ishl => choose_32_64(ty, ShiftOp::LShL32, ShiftOp::LShL64),
1228                 Opcode::Ushr => choose_32_64(ty, ShiftOp::LShR32, ShiftOp::LShR64),
1229                 Opcode::Sshr => choose_32_64(ty, ShiftOp::AShR32, ShiftOp::AShR64),
1230                 _ => unreachable!(),
1231             };
1232             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1233             let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
1234             if let Some(imm) = input_matches_const(ctx, inputs[1]) {
1235                 let imm = imm & if size < 64 { 31 } else { 63 };
1236                 let shift_imm = SImm20::maybe_from_i64(imm as i64).unwrap();
1237                 let shift_reg = None;
1238                 ctx.emit(Inst::ShiftRR {
1239                     shift_op,
1240                     rd,
1241                     rn,
1242                     shift_imm,
1243                     shift_reg,
1244                 });
1245             } else {
1246                 let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1247                 let shift_imm = SImm20::zero();
1248                 let shift_reg = if size < 64 {
1249                     let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
1250                     ctx.emit(Inst::gen_move(tmp, rm, types::I64));
1251                     ctx.emit(Inst::AluRUImm16Shifted {
1252                         alu_op: ALUOp::And64,
1253                         rd: tmp,
1254                         imm: UImm16Shifted::maybe_from_u64(31).unwrap(),
1255                     });
1256                     Some(tmp.to_reg())
1257                 } else {
1258                     Some(rm)
1259                 };
1260                 ctx.emit(Inst::ShiftRR {
1261                     shift_op,
1262                     rd,
1263                     rn,
1264                     shift_imm,
1265                     shift_reg,
1266                 });
1267             }
1268         }
1269 
1270         Opcode::Rotr | Opcode::Rotl => {
1271             // s390x doesn't have a right-rotate instruction, but a right rotation of K places is
1272             // effectively a left rotation of N - K places, if N is the integer's bit size. We
1273             // implement right rotations with this trick.
1274             //
1275             // For a 32-bit or 64-bit rotate-left, we can use the ROR instruction directly.
1276             //
1277             // For a < 32-bit rotate-left, we synthesize this as:
1278             //
1279             //    rotr rd, rn, rm
1280             //
1281             //       =>
1282             //
1283             //    zero-extend rn, <32-or-64>
1284             //    and tmp_masked_rm, rm, <bitwidth - 1>
1285             //    sub tmp1, tmp_masked_rm, <bitwidth>
1286             //    sub tmp1, zero, tmp1  ; neg
1287             //    lsr tmp2, rn, tmp_masked_rm
1288             //    lsl rd, rn, tmp1
1289             //    orr rd, rd, tmp2
1290             //
1291             // For a constant amount, we can instead do:
1292             //
1293             //    zero-extend rn, <32-or-64>
1294             //    lsr tmp2, rn, #<shiftimm>
1295             //    lsl rd, rn, <bitwidth - shiftimm>
1296             //    orr rd, rd, tmp2
1297 
1298             let is_rotr = op == Opcode::Rotr;
1299 
1300             let ty = ty.unwrap();
1301             let ty_bits_size = ty_bits(ty) as u64;
1302 
1303             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1304             let rn = put_input_in_reg(
1305                 ctx,
1306                 inputs[0],
1307                 if ty_bits_size <= 32 {
1308                     NarrowValueMode::ZeroExtend32
1309                 } else {
1310                     NarrowValueMode::ZeroExtend64
1311                 },
1312             );
1313 
1314             if ty_bits_size == 32 || ty_bits_size == 64 {
1315                 let shift_op = choose_32_64(ty, ShiftOp::RotL32, ShiftOp::RotL64);
1316                 if let Some(imm) = input_matches_const(ctx, inputs[1]) {
1317                     let shiftcount = imm & (ty_bits_size - 1);
1318                     let shiftcount = if is_rotr {
1319                         ty_bits_size - shiftcount
1320                     } else {
1321                         shiftcount
1322                     };
1323                     ctx.emit(Inst::ShiftRR {
1324                         shift_op,
1325                         rd,
1326                         rn,
1327                         shift_imm: SImm20::maybe_from_i64(shiftcount as i64).unwrap(),
1328                         shift_reg: None,
1329                     });
1330                 } else {
1331                     let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1332                     let rm = if is_rotr {
1333                         // Really ty_bits_size - rn, but the upper bits of the result are
1334                         // ignored (because of the implicit masking done by the instruction),
1335                         // so this is equivalent to negating the input.
1336                         let op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64);
1337                         let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
1338                         ctx.emit(Inst::UnaryRR {
1339                             op,
1340                             rd: tmp,
1341                             rn: rm,
1342                         });
1343                         tmp.to_reg()
1344                     } else {
1345                         rm
1346                     };
1347                     ctx.emit(Inst::ShiftRR {
1348                         shift_op,
1349                         rd,
1350                         rn,
1351                         shift_imm: SImm20::zero(),
1352                         shift_reg: Some(rm),
1353                     });
1354                 }
1355             } else {
1356                 debug_assert!(ty_bits_size < 32);
1357 
1358                 if let Some(imm) = input_matches_const(ctx, inputs[1]) {
1359                     let rot_count = imm & (ty_bits_size - 1);
1360                     let (lshl_count, lshr_count) = if is_rotr {
1361                         (ty_bits_size - rot_count, rot_count)
1362                     } else {
1363                         (rot_count, ty_bits_size - rot_count)
1364                     };
1365 
1366                     let tmp1 = ctx.alloc_tmp(types::I32).only_reg().unwrap();
1367                     ctx.emit(Inst::ShiftRR {
1368                         shift_op: ShiftOp::LShL32,
1369                         rd: tmp1,
1370                         rn,
1371                         shift_imm: SImm20::maybe_from_i64(lshl_count as i64).unwrap(),
1372                         shift_reg: None,
1373                     });
1374 
1375                     let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap();
1376                     ctx.emit(Inst::ShiftRR {
1377                         shift_op: ShiftOp::LShR32,
1378                         rd: tmp2,
1379                         rn,
1380                         shift_imm: SImm20::maybe_from_i64(lshr_count as i64).unwrap(),
1381                         shift_reg: None,
1382                     });
1383 
1384                     ctx.emit(Inst::AluRRR {
1385                         alu_op: ALUOp::Orr32,
1386                         rd,
1387                         rn: tmp1.to_reg(),
1388                         rm: tmp2.to_reg(),
1389                     });
1390                 } else {
1391                     let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1392                     let tmp1 = ctx.alloc_tmp(types::I32).only_reg().unwrap();
1393                     let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap();
1394 
1395                     ctx.emit(Inst::mov32(tmp1, rm));
1396                     ctx.emit(Inst::UnaryRR {
1397                         op: UnaryOp::Neg32,
1398                         rd: tmp2,
1399                         rn: rm,
1400                     });
1401 
1402                     ctx.emit(Inst::AluRUImm16Shifted {
1403                         alu_op: ALUOp::And32,
1404                         rd: tmp1,
1405                         imm: UImm16Shifted::maybe_from_u64(ty_bits_size - 1).unwrap(),
1406                     });
1407                     ctx.emit(Inst::AluRUImm16Shifted {
1408                         alu_op: ALUOp::And32,
1409                         rd: tmp2,
1410                         imm: UImm16Shifted::maybe_from_u64(ty_bits_size - 1).unwrap(),
1411                     });
1412 
1413                     let (lshr, lshl) = if is_rotr { (tmp2, tmp1) } else { (tmp1, tmp2) };
1414 
1415                     ctx.emit(Inst::ShiftRR {
1416                         shift_op: ShiftOp::LShL32,
1417                         rd: lshl,
1418                         rn,
1419                         shift_imm: SImm20::zero(),
1420                         shift_reg: Some(lshl.to_reg()),
1421                     });
1422 
1423                     ctx.emit(Inst::ShiftRR {
1424                         shift_op: ShiftOp::LShR32,
1425                         rd: lshr,
1426                         rn,
1427                         shift_imm: SImm20::zero(),
1428                         shift_reg: Some(lshr.to_reg()),
1429                     });
1430 
1431                     ctx.emit(Inst::AluRRR {
1432                         alu_op: ALUOp::Orr32,
1433                         rd,
1434                         rn: lshl.to_reg(),
1435                         rm: lshr.to_reg(),
1436                     });
1437                 }
1438             }
1439         }
1440 
1441         Opcode::Bnot => {
1442             let ty = ty.unwrap();
1443             let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
1444             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1445             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1446             ctx.emit(Inst::AluRRR {
1447                 alu_op,
1448                 rd,
1449                 rn,
1450                 rm: rn,
1451             });
1452         }
1453 
1454         Opcode::Band => {
1455             let ty = ty.unwrap();
1456             let alu_op = choose_32_64(ty, ALUOp::And32, ALUOp::And64);
1457             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1458             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1459             if let Some(imm) = input_matches_uimm16shifted_inv(ctx, inputs[1]) {
1460                 ctx.emit(Inst::gen_move(rd, rn, ty));
1461                 ctx.emit(Inst::AluRUImm16Shifted { alu_op, rd, imm });
1462             } else if let Some(imm) = input_matches_uimm32shifted_inv(ctx, inputs[1]) {
1463                 ctx.emit(Inst::gen_move(rd, rn, ty));
1464                 ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm });
1465             } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) {
1466                 ctx.emit(Inst::gen_move(rd, rn, ty));
1467                 ctx.emit(Inst::AluRX { alu_op, rd, mem });
1468             } else {
1469                 let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1470                 ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
1471             }
1472         }
1473 
1474         Opcode::Bor => {
1475             let ty = ty.unwrap();
1476             let alu_op = choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64);
1477             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1478             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1479             if let Some(imm) = input_matches_uimm16shifted(ctx, inputs[1]) {
1480                 ctx.emit(Inst::gen_move(rd, rn, ty));
1481                 ctx.emit(Inst::AluRUImm16Shifted { alu_op, rd, imm });
1482             } else if let Some(imm) = input_matches_uimm32shifted(ctx, inputs[1]) {
1483                 ctx.emit(Inst::gen_move(rd, rn, ty));
1484                 ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm });
1485             } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) {
1486                 ctx.emit(Inst::gen_move(rd, rn, ty));
1487                 ctx.emit(Inst::AluRX { alu_op, rd, mem });
1488             } else {
1489                 let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1490                 ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
1491             }
1492         }
1493 
1494         Opcode::Bxor => {
1495             let ty = ty.unwrap();
1496             let alu_op = choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64);
1497             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1498             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1499             if let Some(imm) = input_matches_uimm32shifted(ctx, inputs[1]) {
1500                 ctx.emit(Inst::gen_move(rd, rn, ty));
1501                 ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm });
1502             } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) {
1503                 ctx.emit(Inst::gen_move(rd, rn, ty));
1504                 ctx.emit(Inst::AluRX { alu_op, rd, mem });
1505             } else {
1506                 let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1507                 ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
1508             }
1509         }
1510 
1511         Opcode::BandNot | Opcode::BorNot | Opcode::BxorNot => {
1512             let ty = ty.unwrap();
1513             let alu_op = match op {
1514                 Opcode::BandNot => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64),
1515                 Opcode::BorNot => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64),
1516                 Opcode::BxorNot => choose_32_64(ty, ALUOp::XorNot32, ALUOp::XorNot64),
1517                 _ => unreachable!(),
1518             };
1519             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1520             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1521             let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1522             ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
1523         }
1524 
1525         Opcode::Bitselect => {
1526             let ty = ty.unwrap();
1527             let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
1528             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1529             let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1530             let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1531             let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
1532             ctx.emit(Inst::AluRRR {
1533                 alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64),
1534                 rd: tmp,
1535                 rn,
1536                 rm: rcond,
1537             });
1538             ctx.emit(Inst::AluRRR {
1539                 alu_op: choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64),
1540                 rd,
1541                 rn: rm,
1542                 rm: rcond,
1543             });
1544             ctx.emit(Inst::AluRRR {
1545                 alu_op: choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64),
1546                 rd,
1547                 rn: rd.to_reg(),
1548                 rm: tmp.to_reg(),
1549             });
1550         }
1551 
1552         Opcode::Bextend | Opcode::Bmask => {
1553             // Bextend and Bmask both simply sign-extend. This works for:
1554             // - Bextend, because booleans are stored as 0 / -1, so we
1555             //   sign-extend the -1 to a -1 in the wider width.
1556             // - Bmask, because the resulting integer mask value must be
1557             //   all-ones (-1) if the argument is true.
1558             //
1559             // For a sign-extension from a 1-bit value (Case 1 below), we need
1560             // to do things a bit specially, because the ISA does not have a
1561             // 1-to-N-bit sign extension instruction.  For 8-bit or wider
1562             // sources (Case 2 below), we do a sign extension normally.
1563 
1564             let from_ty = ctx.input_ty(insn, 0);
1565             let to_ty = ctx.output_ty(insn, 0);
1566             let from_bits = ty_bits(from_ty);
1567             let to_bits = ty_bits(to_ty);
1568 
1569             assert!(
1570                 from_bits <= 64 && to_bits <= 64,
1571                 "Vector Bextend not supported yet"
1572             );
1573 
1574             if from_bits >= to_bits {
1575                 // Just a move.
1576                 let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1577                 let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1578                 let ty = ctx.input_ty(insn, 0);
1579                 ctx.emit(Inst::gen_move(rd, rn, ty));
1580             } else if from_bits == 1 {
1581                 assert!(to_bits >= 8);
1582                 // Case 1: 1-bit to N-bit extension: use a shift-left /
1583                 // shift-right sequence to create a 0 / -1 result.
1584                 let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1585                 let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1586                 let shl_op = choose_32_64(to_ty, ShiftOp::LShL32, ShiftOp::LShL64);
1587                 let shr_op = choose_32_64(to_ty, ShiftOp::AShR32, ShiftOp::AShR64);
1588                 let count = if to_bits > 32 { 63 } else { 31 };
1589                 ctx.emit(Inst::ShiftRR {
1590                     shift_op: shl_op,
1591                     rd,
1592                     rn,
1593                     shift_imm: SImm20::maybe_from_i64(count.into()).unwrap(),
1594                     shift_reg: None,
1595                 });
1596                 ctx.emit(Inst::ShiftRR {
1597                     shift_op: shr_op,
1598                     rd,
1599                     rn: rd.to_reg(),
1600                     shift_imm: SImm20::maybe_from_i64(count.into()).unwrap(),
1601                     shift_reg: None,
1602                 });
1603             } else {
1604                 // Case 2: 8-or-more-bit to N-bit extension: just sign-extend. A
1605                 // `true` (all ones, or `-1`) will be extended to -1 with the
1606                 // larger width.
1607                 assert!(from_bits >= 8);
1608                 let narrow_mode = if to_bits == 64 {
1609                     NarrowValueMode::SignExtend64
1610                 } else {
1611                     assert!(to_bits <= 32);
1612                     NarrowValueMode::SignExtend32
1613                 };
1614                 let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
1615                 let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1616                 ctx.emit(Inst::gen_move(rd, rn, to_ty));
1617             }
1618         }
1619 
1620         Opcode::Bint => {
1621             // Booleans are stored as all-zeroes (0) or all-ones (-1). We AND
1622             // out the LSB to give a 0 / 1-valued integer result.
1623             let ty = ty.unwrap();
1624             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1625             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1626             if ty_bits(ty) <= 16 {
1627                 ctx.emit(Inst::gen_move(rd, rn, ty));
1628                 ctx.emit(Inst::AluRUImm16Shifted {
1629                     alu_op: ALUOp::And32,
1630                     rd,
1631                     imm: UImm16Shifted::maybe_from_u64(1).unwrap(),
1632                 });
1633             } else if ty_bits(ty) <= 32 {
1634                 ctx.emit(Inst::gen_move(rd, rn, ty));
1635                 ctx.emit(Inst::AluRUImm32Shifted {
1636                     alu_op: ALUOp::And32,
1637                     rd,
1638                     imm: UImm32Shifted::maybe_from_u64(1).unwrap(),
1639                 });
1640             } else {
1641                 let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
1642                 lower_constant_u64(ctx, tmp, 1);
1643                 ctx.emit(Inst::AluRRR {
1644                     alu_op: ALUOp::And64,
1645                     rd,
1646                     rn,
1647                     rm: tmp.to_reg(),
1648                 });
1649             }
1650         }
1651 
1652         Opcode::Clz => {
1653             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1654             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1655             let ty = ty.unwrap();
1656             let ty_bits_size = ty_bits(ty);
1657 
1658             let rn = if ty_bits_size < 64 {
1659                 let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
1660                 ctx.emit(Inst::Extend {
1661                     rd: tmp,
1662                     rn,
1663                     signed: false,
1664                     from_bits: ty_bits_size as u8,
1665                     to_bits: 64,
1666                 });
1667                 tmp.to_reg()
1668             } else {
1669                 rn
1670             };
1671 
1672             ctx.emit(Inst::Flogr { rn });
1673             ctx.emit(Inst::gen_move(rd, gpr(0), ty));
1674 
1675             if ty_bits_size < 64 {
1676                 ctx.emit(Inst::AluRSImm16 {
1677                     alu_op: ALUOp::Add32,
1678                     rd,
1679                     imm: -(64 - ty_bits_size as i16),
1680                 });
1681             }
1682         }
1683 
1684         Opcode::Cls => {
1685             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1686             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1687             let ty = ty.unwrap();
1688             let ty_bits_size = ty_bits(ty);
1689 
1690             let rn = if ty_bits_size < 64 {
1691                 let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
1692                 ctx.emit(Inst::Extend {
1693                     rd: tmp,
1694                     rn,
1695                     signed: true,
1696                     from_bits: ty_bits_size as u8,
1697                     to_bits: 64,
1698                 });
1699                 tmp.to_reg()
1700             } else {
1701                 rn
1702             };
1703 
1704             // tmp = rn ^ ((signed)rn >> 63)
1705             let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
1706             ctx.emit(Inst::ShiftRR {
1707                 shift_op: ShiftOp::AShR64,
1708                 rd: tmp,
1709                 rn,
1710                 shift_imm: SImm20::maybe_from_i64(63).unwrap(),
1711                 shift_reg: None,
1712             });
1713             ctx.emit(Inst::AluRRR {
1714                 alu_op: ALUOp::Xor64,
1715                 rd: tmp,
1716                 rn: tmp.to_reg(),
1717                 rm: rn,
1718             });
1719 
1720             ctx.emit(Inst::Flogr { rn });
1721             ctx.emit(Inst::gen_move(rd, gpr(0), ty));
1722 
1723             if ty_bits_size < 64 {
1724                 ctx.emit(Inst::AluRSImm16 {
1725                     alu_op: ALUOp::Add32,
1726                     rd,
1727                     imm: -(64 - ty_bits_size as i16),
1728                 });
1729             }
1730         }
1731 
1732         Opcode::Ctz => {
1733             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1734             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1735             let ty = ty.unwrap();
1736             let ty_bits_size = ty_bits(ty);
1737 
1738             let rn = if ty_bits_size < 64 {
1739                 let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
1740                 ctx.emit(Inst::gen_move(tmp, rn, ty));
1741                 ctx.emit(Inst::AluRUImm16Shifted {
1742                     alu_op: ALUOp::Orr64,
1743                     rd: tmp,
1744                     imm: UImm16Shifted::maybe_from_u64(1u64 << ty_bits_size).unwrap(),
1745                 });
1746                 tmp.to_reg()
1747             } else {
1748                 rn
1749             };
1750 
1751             // tmp = rn & -rn
1752             let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
1753             ctx.emit(Inst::UnaryRR {
1754                 op: UnaryOp::Neg64,
1755                 rd: tmp,
1756                 rn,
1757             });
1758             ctx.emit(Inst::AluRRR {
1759                 alu_op: ALUOp::And64,
1760                 rd: tmp,
1761                 rn: tmp.to_reg(),
1762                 rm: rn,
1763             });
1764 
1765             ctx.emit(Inst::Flogr { rn: tmp.to_reg() });
1766             if ty_bits_size == 64 {
1767                 ctx.emit(Inst::CMov64SImm16 {
1768                     rd: writable_gpr(0),
1769                     cond: Cond::from_intcc(IntCC::Equal),
1770                     imm: -1,
1771                 });
1772             }
1773 
1774             if ty_bits_size <= 32 {
1775                 lower_constant_u32(ctx, rd, 63);
1776             } else {
1777                 lower_constant_u64(ctx, rd, 63);
1778             }
1779             let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
1780             ctx.emit(Inst::AluRRR {
1781                 alu_op,
1782                 rd,
1783                 rn: rd.to_reg(),
1784                 rm: gpr(0),
1785             });
1786         }
1787 
1788         Opcode::Bitrev => unimplemented!(),
1789 
1790         Opcode::Popcnt => {
1791             let ty = ty.unwrap();
1792             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1793             if ty_bits(ty) <= 8 {
1794                 let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1795                 ctx.emit(Inst::UnaryRR {
1796                     op: UnaryOp::PopcntByte,
1797                     rd,
1798                     rn,
1799                 });
1800             } else {
1801                 let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
1802                 ctx.emit(Inst::UnaryRR {
1803                     op: UnaryOp::PopcntReg,
1804                     rd,
1805                     rn,
1806                 });
1807             }
1808         }
1809 
1810         Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => {
1811             let bits = ty_bits(ctx.output_ty(insn, 0));
1812             let fpu_op = match (op, bits) {
1813                 (Opcode::Fadd, 32) => FPUOp2::Add32,
1814                 (Opcode::Fadd, 64) => FPUOp2::Add64,
1815                 (Opcode::Fsub, 32) => FPUOp2::Sub32,
1816                 (Opcode::Fsub, 64) => FPUOp2::Sub64,
1817                 (Opcode::Fmul, 32) => FPUOp2::Mul32,
1818                 (Opcode::Fmul, 64) => FPUOp2::Mul64,
1819                 (Opcode::Fdiv, 32) => FPUOp2::Div32,
1820                 (Opcode::Fdiv, 64) => FPUOp2::Div64,
1821                 _ => panic!("Unknown op/bits combination"),
1822             };
1823             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1824             let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1825             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1826             ctx.emit(Inst::mov64(rd, rn));
1827             ctx.emit(Inst::FpuRRR { fpu_op, rd, rm });
1828         }
1829 
1830         Opcode::Fmin | Opcode::Fmax => {
1831             let bits = ty_bits(ctx.output_ty(insn, 0));
1832             let fpu_op = match (op, bits) {
1833                 (Opcode::Fmin, 32) => FPUOp2::Min32,
1834                 (Opcode::Fmin, 64) => FPUOp2::Min64,
1835                 (Opcode::Fmax, 32) => FPUOp2::Max32,
1836                 (Opcode::Fmax, 64) => FPUOp2::Max64,
1837                 _ => panic!("Unknown op/bits combination"),
1838             };
1839             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1840             let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1841             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1842             ctx.emit(Inst::FpuVecRRR { fpu_op, rd, rn, rm });
1843         }
1844 
1845         Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => {
1846             let bits = ty_bits(ctx.output_ty(insn, 0));
1847             let fpu_op = match (op, bits) {
1848                 (Opcode::Sqrt, 32) => FPUOp1::Sqrt32,
1849                 (Opcode::Sqrt, 64) => FPUOp1::Sqrt64,
1850                 (Opcode::Fneg, 32) => FPUOp1::Neg32,
1851                 (Opcode::Fneg, 64) => FPUOp1::Neg64,
1852                 (Opcode::Fabs, 32) => FPUOp1::Abs32,
1853                 (Opcode::Fabs, 64) => FPUOp1::Abs64,
1854                 (Opcode::Fpromote, 32) => panic!("Cannot promote to 32 bits"),
1855                 (Opcode::Fpromote, 64) => FPUOp1::Cvt32To64,
1856                 (Opcode::Fdemote, 32) => FPUOp1::Cvt64To32,
1857                 (Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"),
1858                 _ => panic!("Unknown op/bits combination"),
1859             };
1860             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1861             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1862             ctx.emit(Inst::FpuRR { fpu_op, rd, rn });
1863         }
1864 
1865         Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => {
1866             let bits = ty_bits(ctx.output_ty(insn, 0));
1867             let op = match (op, bits) {
1868                 (Opcode::Ceil, 32) => FpuRoundMode::Plus32,
1869                 (Opcode::Ceil, 64) => FpuRoundMode::Plus64,
1870                 (Opcode::Floor, 32) => FpuRoundMode::Minus32,
1871                 (Opcode::Floor, 64) => FpuRoundMode::Minus64,
1872                 (Opcode::Trunc, 32) => FpuRoundMode::Zero32,
1873                 (Opcode::Trunc, 64) => FpuRoundMode::Zero64,
1874                 (Opcode::Nearest, 32) => FpuRoundMode::Nearest32,
1875                 (Opcode::Nearest, 64) => FpuRoundMode::Nearest64,
1876                 _ => panic!("Unknown op/bits combination"),
1877             };
1878             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1879             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1880             ctx.emit(Inst::FpuRound { op, rd, rn });
1881         }
1882 
1883         Opcode::Fma => {
1884             let bits = ty_bits(ctx.output_ty(insn, 0));
1885             let fpu_op = match bits {
1886                 32 => FPUOp3::MAdd32,
1887                 64 => FPUOp3::MAdd64,
1888                 _ => panic!("Unknown op size"),
1889             };
1890             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1891             let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1892             let ra = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
1893             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1894             ctx.emit(Inst::mov64(rd, ra));
1895             ctx.emit(Inst::FpuRRRR { fpu_op, rd, rn, rm });
1896         }
1897 
1898         Opcode::Fcopysign => {
1899             let ty = ctx.output_ty(insn, 0);
1900             let bits = ty_bits(ty) as u8;
1901             assert!(bits == 32 || bits == 64);
1902             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1903             let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
1904             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1905 
1906             ctx.emit(Inst::FpuCopysign { rd, rn, rm });
1907         }
1908 
1909         Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
1910             let in_bits = ty_bits(ctx.input_ty(insn, 0));
1911             let out_bits = ty_bits(ctx.output_ty(insn, 0));
1912             let signed = op == Opcode::FcvtFromSint;
1913             let op = match (signed, in_bits, out_bits) {
1914                 (false, 32, 32) => IntToFpuOp::U32ToF32,
1915                 (true, 32, 32) => IntToFpuOp::I32ToF32,
1916                 (false, 32, 64) => IntToFpuOp::U32ToF64,
1917                 (true, 32, 64) => IntToFpuOp::I32ToF64,
1918                 (false, 64, 32) => IntToFpuOp::U64ToF32,
1919                 (true, 64, 32) => IntToFpuOp::I64ToF32,
1920                 (false, 64, 64) => IntToFpuOp::U64ToF64,
1921                 (true, 64, 64) => IntToFpuOp::I64ToF64,
1922                 _ => panic!("Unknown input/output-bits combination"),
1923             };
1924             let narrow_mode = match (signed, in_bits) {
1925                 (false, 32) => NarrowValueMode::ZeroExtend32,
1926                 (true, 32) => NarrowValueMode::SignExtend32,
1927                 (false, 64) => NarrowValueMode::ZeroExtend64,
1928                 (true, 64) => NarrowValueMode::SignExtend64,
1929                 _ => panic!("Unknown input size"),
1930             };
1931             let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
1932             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1933             ctx.emit(Inst::IntToFpu { op, rd, rn });
1934         }
1935 
1936         Opcode::FcvtToUint | Opcode::FcvtToSint => {
1937             let in_bits = ty_bits(ctx.input_ty(insn, 0));
1938             let out_bits = ty_bits(ctx.output_ty(insn, 0));
1939             let signed = op == Opcode::FcvtToSint;
1940             let op = match (signed, in_bits, out_bits) {
1941                 (false, 32, 32) => FpuToIntOp::F32ToU32,
1942                 (true, 32, 32) => FpuToIntOp::F32ToI32,
1943                 (false, 32, 64) => FpuToIntOp::F32ToU64,
1944                 (true, 32, 64) => FpuToIntOp::F32ToI64,
1945                 (false, 64, 32) => FpuToIntOp::F64ToU32,
1946                 (true, 64, 32) => FpuToIntOp::F64ToI32,
1947                 (false, 64, 64) => FpuToIntOp::F64ToU64,
1948                 (true, 64, 64) => FpuToIntOp::F64ToI64,
1949                 _ => panic!("Unknown input/output-bits combination"),
1950             };
1951 
1952             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1953             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1954 
1955             // First, check whether the input is a NaN and trap if so.
1956             if in_bits == 32 {
1957                 ctx.emit(Inst::FpuCmp32 { rn, rm: rn });
1958             } else {
1959                 ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
1960             }
1961             ctx.emit(Inst::TrapIf {
1962                 trap_code: TrapCode::BadConversionToInteger,
1963                 cond: Cond::from_floatcc(FloatCC::Unordered),
1964             });
1965 
1966             // Perform the conversion.  If this sets CC 3, we have a
1967             // "special case".  Since we already exluded the case where
1968             // the input was a NaN, the only other option is that the
1969             // conversion overflowed the target type.
1970             ctx.emit(Inst::FpuToInt { op, rd, rn });
1971             ctx.emit(Inst::TrapIf {
1972                 trap_code: TrapCode::IntegerOverflow,
1973                 cond: Cond::from_floatcc(FloatCC::Unordered),
1974             });
1975         }
1976 
1977         Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
1978             let in_bits = ty_bits(ctx.input_ty(insn, 0));
1979             let out_bits = ty_bits(ctx.output_ty(insn, 0));
1980             let signed = op == Opcode::FcvtToSintSat;
1981             let op = match (signed, in_bits, out_bits) {
1982                 (false, 32, 32) => FpuToIntOp::F32ToU32,
1983                 (true, 32, 32) => FpuToIntOp::F32ToI32,
1984                 (false, 32, 64) => FpuToIntOp::F32ToU64,
1985                 (true, 32, 64) => FpuToIntOp::F32ToI64,
1986                 (false, 64, 32) => FpuToIntOp::F64ToU32,
1987                 (true, 64, 32) => FpuToIntOp::F64ToI32,
1988                 (false, 64, 64) => FpuToIntOp::F64ToU64,
1989                 (true, 64, 64) => FpuToIntOp::F64ToI64,
1990                 _ => panic!("Unknown input/output-bits combination"),
1991             };
1992 
1993             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
1994             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
1995 
1996             // Perform the conversion.
1997             ctx.emit(Inst::FpuToInt { op, rd, rn });
1998 
1999             // In most special cases, the Z instruction already yields the
2000             // result expected by Cranelift semantic.  The only exception
2001             // it the case where the input was a Nan.  We explicitly check
2002             // for that and force the output to 0 in that case.
2003             if in_bits == 32 {
2004                 ctx.emit(Inst::FpuCmp32 { rn, rm: rn });
2005             } else {
2006                 ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
2007             }
2008             let cond = Cond::from_floatcc(FloatCC::Unordered);
2009             if out_bits <= 32 {
2010                 ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: 0 });
2011             } else {
2012                 ctx.emit(Inst::CMov64SImm16 { rd, cond, imm: 0 });
2013             }
2014         }
2015 
2016         Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
2017 
2018         Opcode::Bitcast => {
2019             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2020             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
2021             let input_ty = ctx.input_ty(insn, 0);
2022             let output_ty = ctx.output_ty(insn, 0);
2023             match (input_ty, output_ty) {
2024                 (types::I64, types::F64) => {
2025                     ctx.emit(Inst::MovToFpr { rd, rn });
2026                 }
2027                 (types::F64, types::I64) => {
2028                     ctx.emit(Inst::MovFromFpr { rd, rn });
2029                 }
2030                 (types::I32, types::F32) => {
2031                     let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
2032                     ctx.emit(Inst::ShiftRR {
2033                         shift_op: ShiftOp::LShL64,
2034                         rd: tmp,
2035                         rn,
2036                         shift_imm: SImm20::maybe_from_i64(32).unwrap(),
2037                         shift_reg: None,
2038                     });
2039                     ctx.emit(Inst::MovToFpr {
2040                         rd,
2041                         rn: tmp.to_reg(),
2042                     });
2043                 }
2044                 (types::F32, types::I32) => {
2045                     let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
2046                     ctx.emit(Inst::MovFromFpr { rd: tmp, rn });
2047                     ctx.emit(Inst::ShiftRR {
2048                         shift_op: ShiftOp::LShR64,
2049                         rd,
2050                         rn: tmp.to_reg(),
2051                         shift_imm: SImm20::maybe_from_i64(32).unwrap(),
2052                         shift_reg: None,
2053                     });
2054                 }
2055                 _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty),
2056             }
2057         }
2058 
2059         Opcode::Load
2060         | Opcode::Uload8
2061         | Opcode::Sload8
2062         | Opcode::Uload16
2063         | Opcode::Sload16
2064         | Opcode::Uload32
2065         | Opcode::Sload32
2066         | Opcode::LoadComplex
2067         | Opcode::Uload8Complex
2068         | Opcode::Sload8Complex
2069         | Opcode::Uload16Complex
2070         | Opcode::Sload16Complex
2071         | Opcode::Uload32Complex
2072         | Opcode::Sload32Complex => {
2073             let off = ctx.data(insn).load_store_offset().unwrap();
2074             let flags = ctx.memflags(insn).unwrap();
2075             let endianness = flags.endianness(Endianness::Big);
2076             let elem_ty = ctx.output_ty(insn, 0);
2077             let is_float = ty_is_float(elem_ty);
2078             let to_bits = ty_bits(elem_ty);
2079             let from_bits = match op {
2080                 Opcode::Load | Opcode::LoadComplex => to_bits,
2081                 Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
2082                     8
2083                 }
2084                 Opcode::Sload16
2085                 | Opcode::Uload16
2086                 | Opcode::Sload16Complex
2087                 | Opcode::Uload16Complex => 16,
2088                 Opcode::Sload32
2089                 | Opcode::Uload32
2090                 | Opcode::Sload32Complex
2091                 | Opcode::Uload32Complex => 32,
2092                 _ => unreachable!(),
2093             };
2094             let ext_bits = if to_bits < 32 { 32 } else { to_bits };
2095             let sign_extend = match op {
2096                 Opcode::Sload8
2097                 | Opcode::Sload8Complex
2098                 | Opcode::Sload16
2099                 | Opcode::Sload16Complex
2100                 | Opcode::Sload32
2101                 | Opcode::Sload32Complex => true,
2102                 _ => false,
2103             };
2104 
2105             let mem = lower_address(ctx, &inputs[..], off, flags);
2106             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2107 
2108             if endianness == Endianness::Big {
2109                 ctx.emit(match (ext_bits, from_bits, sign_extend, is_float) {
2110                     (32, 32, _, true) => Inst::FpuLoad32 { rd, mem },
2111                     (64, 64, _, true) => Inst::FpuLoad64 { rd, mem },
2112                     (32, 32, _, false) => Inst::Load32 { rd, mem },
2113                     (64, 64, _, false) => Inst::Load64 { rd, mem },
2114                     (32, 8, false, _) => Inst::Load32ZExt8 { rd, mem },
2115                     (32, 8, true, _) => Inst::Load32SExt8 { rd, mem },
2116                     (32, 16, false, _) => Inst::Load32ZExt16 { rd, mem },
2117                     (32, 16, true, _) => Inst::Load32SExt16 { rd, mem },
2118                     (64, 8, false, _) => Inst::Load64ZExt8 { rd, mem },
2119                     (64, 8, true, _) => Inst::Load64SExt8 { rd, mem },
2120                     (64, 16, false, _) => Inst::Load64ZExt16 { rd, mem },
2121                     (64, 16, true, _) => Inst::Load64SExt16 { rd, mem },
2122                     (64, 32, false, _) => Inst::Load64ZExt32 { rd, mem },
2123                     (64, 32, true, _) => Inst::Load64SExt32 { rd, mem },
2124                     _ => panic!("Unsupported size in load"),
2125                 });
2126             } else {
2127                 ctx.emit(match (ext_bits, from_bits, sign_extend, is_float) {
2128                     (32, 32, _, true) => Inst::FpuLoadRev32 { rd, mem },
2129                     (64, 64, _, true) => Inst::FpuLoadRev64 { rd, mem },
2130                     (_, 16, _, false) => Inst::LoadRev16 { rd, mem },
2131                     (_, 32, _, false) => Inst::LoadRev32 { rd, mem },
2132                     (_, 64, _, false) => Inst::LoadRev64 { rd, mem },
2133                     (32, 8, false, _) => Inst::Load32ZExt8 { rd, mem },
2134                     (32, 8, true, _) => Inst::Load32SExt8 { rd, mem },
2135                     (64, 8, false, _) => Inst::Load64ZExt8 { rd, mem },
2136                     (64, 8, true, _) => Inst::Load64SExt8 { rd, mem },
2137                     _ => panic!("Unsupported size in load"),
2138                 });
2139                 if to_bits > from_bits && from_bits > 8 {
2140                     assert!(is_float == false);
2141                     ctx.emit(Inst::Extend {
2142                         rd,
2143                         rn: rd.to_reg(),
2144                         signed: sign_extend,
2145                         from_bits: from_bits as u8,
2146                         to_bits: to_bits as u8,
2147                     });
2148                 }
2149             }
2150         }
2151 
2152         Opcode::Store
2153         | Opcode::Istore8
2154         | Opcode::Istore16
2155         | Opcode::Istore32
2156         | Opcode::StoreComplex
2157         | Opcode::Istore8Complex
2158         | Opcode::Istore16Complex
2159         | Opcode::Istore32Complex => {
2160             let off = ctx.data(insn).load_store_offset().unwrap();
2161             let flags = ctx.memflags(insn).unwrap();
2162             let endianness = flags.endianness(Endianness::Big);
2163             let elem_ty = match op {
2164                 Opcode::Istore8 | Opcode::Istore8Complex => types::I8,
2165                 Opcode::Istore16 | Opcode::Istore16Complex => types::I16,
2166                 Opcode::Istore32 | Opcode::Istore32Complex => types::I32,
2167                 Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0),
2168                 _ => unreachable!(),
2169             };
2170 
2171             let mem = lower_address(ctx, &inputs[1..], off, flags);
2172 
2173             if ty_is_float(elem_ty) {
2174                 let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
2175                 ctx.emit(match (endianness, ty_bits(elem_ty)) {
2176                     (Endianness::Big, 32) => Inst::FpuStore32 { rd, mem },
2177                     (Endianness::Big, 64) => Inst::FpuStore64 { rd, mem },
2178                     (Endianness::Little, 32) => Inst::FpuStoreRev32 { rd, mem },
2179                     (Endianness::Little, 64) => Inst::FpuStoreRev64 { rd, mem },
2180                     _ => panic!("Unsupported size in store"),
2181                 });
2182             } else if ty_bits(elem_ty) <= 16 {
2183                 if let Some(imm) = input_matches_const(ctx, inputs[0]) {
2184                     ctx.emit(match (endianness, ty_bits(elem_ty)) {
2185                         (_, 1) | (_, 8) => Inst::StoreImm8 {
2186                             imm: imm as u8,
2187                             mem,
2188                         },
2189                         (Endianness::Big, 16) => Inst::StoreImm16 {
2190                             imm: imm as i16,
2191                             mem,
2192                         },
2193                         (Endianness::Little, 16) => Inst::StoreImm16 {
2194                             imm: (imm as i16).swap_bytes(),
2195                             mem,
2196                         },
2197                         _ => panic!("Unsupported size in store"),
2198                     });
2199                 } else {
2200                     let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
2201                     ctx.emit(match (endianness, ty_bits(elem_ty)) {
2202                         (_, 1) | (_, 8) => Inst::Store8 { rd, mem },
2203                         (Endianness::Big, 16) => Inst::Store16 { rd, mem },
2204                         (Endianness::Little, 16) => Inst::StoreRev16 { rd, mem },
2205                         _ => panic!("Unsupported size in store"),
2206                     });
2207                 }
2208             } else if endianness == Endianness::Big {
2209                 if let Some(imm) = input_matches_simm16(ctx, inputs[0]) {
2210                     ctx.emit(match ty_bits(elem_ty) {
2211                         32 => Inst::StoreImm32SExt16 { imm, mem },
2212                         64 => Inst::StoreImm64SExt16 { imm, mem },
2213                         _ => panic!("Unsupported size in store"),
2214                     });
2215                 } else {
2216                     let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
2217                     ctx.emit(match ty_bits(elem_ty) {
2218                         32 => Inst::Store32 { rd, mem },
2219                         64 => Inst::Store64 { rd, mem },
2220                         _ => panic!("Unsupported size in store"),
2221                     });
2222                 }
2223             } else {
2224                 let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
2225                 ctx.emit(match ty_bits(elem_ty) {
2226                     32 => Inst::StoreRev32 { rd, mem },
2227                     64 => Inst::StoreRev64 { rd, mem },
2228                     _ => panic!("Unsupported size in store"),
2229                 });
2230             }
2231         }
2232 
2233         Opcode::StackLoad | Opcode::StackStore => {
2234             panic!("Direct stack memory access not supported; should not be used by Wasm");
2235         }
2236 
2237         Opcode::StackAddr => {
2238             let (stack_slot, offset) = match *ctx.data(insn) {
2239                 InstructionData::StackLoad {
2240                     opcode: Opcode::StackAddr,
2241                     stack_slot,
2242                     offset,
2243                 } => (stack_slot, offset),
2244                 _ => unreachable!(),
2245             };
2246             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2247             let offset: i32 = offset.into();
2248             let inst = ctx
2249                 .abi()
2250                 .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd);
2251             ctx.emit(inst);
2252         }
2253 
2254         Opcode::ConstAddr => unimplemented!(),
2255 
2256         Opcode::FuncAddr => {
2257             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2258             let (extname, dist) = ctx.call_target(insn).unwrap();
2259             let extname = extname.clone();
2260             if dist == RelocDistance::Near {
2261                 ctx.emit(Inst::LoadAddr {
2262                     rd,
2263                     mem: MemArg::Symbol {
2264                         name: Box::new(extname),
2265                         offset: 0,
2266                         flags: MemFlags::trusted(),
2267                     },
2268                 });
2269             } else {
2270                 ctx.emit(Inst::LoadExtNameFar {
2271                     rd,
2272                     name: Box::new(extname),
2273                     offset: 0,
2274                 });
2275             }
2276         }
2277 
2278         Opcode::SymbolValue => {
2279             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2280             let (extname, dist, offset) = ctx.symbol_value(insn).unwrap();
2281             let extname = extname.clone();
2282             if dist == RelocDistance::Near && (offset & 1) == 0 && i32::try_from(offset).is_ok() {
2283                 ctx.emit(Inst::LoadAddr {
2284                     rd,
2285                     mem: MemArg::Symbol {
2286                         name: Box::new(extname),
2287                         offset: i32::try_from(offset).unwrap(),
2288                         flags: MemFlags::trusted(),
2289                     },
2290                 });
2291             } else {
2292                 ctx.emit(Inst::LoadExtNameFar {
2293                     rd,
2294                     name: Box::new(extname),
2295                     offset,
2296                 });
2297             }
2298         }
2299 
2300         Opcode::HeapAddr => {
2301             panic!("heap_addr should have been removed by legalization!");
2302         }
2303 
2304         Opcode::TableAddr => {
2305             panic!("table_addr should have been removed by legalization!");
2306         }
2307 
2308         Opcode::GlobalValue => {
2309             panic!("global_value should have been removed by legalization!");
2310         }
2311 
2312         Opcode::TlsValue => {
2313             panic!("Thread-local storage support not implemented!");
2314         }
2315 
2316         Opcode::GetPinnedReg | Opcode::SetPinnedReg => {
2317             panic!("Pinned register support not implemented!");
2318         }
2319 
2320         Opcode::Icmp => {
2321             let condcode = ctx.data(insn).cond_code().unwrap();
2322             let cond = Cond::from_intcc(condcode);
2323             let is_signed = condcode_is_signed(condcode);
2324             lower_icmp_to_flags(ctx, insn, is_signed, true);
2325 
2326             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2327             let ty = ctx.output_ty(insn, 0);
2328             lower_flags_to_bool_result(ctx, cond, rd, ty);
2329         }
2330 
2331         Opcode::Fcmp => {
2332             let condcode = ctx.data(insn).fp_cond_code().unwrap();
2333             let cond = Cond::from_floatcc(condcode);
2334             lower_fcmp_to_flags(ctx, insn);
2335 
2336             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2337             let ty = ctx.output_ty(insn, 0);
2338             lower_flags_to_bool_result(ctx, cond, rd, ty);
2339         }
2340 
2341         Opcode::IsNull | Opcode::IsInvalid => {
2342             // Null references are represented by the constant value 0; invalid
2343             // references are represented by the constant value -1.
2344             let cond = Cond::from_intcc(IntCC::Equal);
2345             let imm = match op {
2346                 Opcode::IsNull => 0,
2347                 Opcode::IsInvalid => -1,
2348                 _ => unreachable!(),
2349             };
2350             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
2351             ctx.emit(Inst::CmpRSImm16 {
2352                 op: CmpOp::CmpS64,
2353                 rn,
2354                 imm,
2355             });
2356 
2357             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2358             let ty = ctx.output_ty(insn, 0);
2359             lower_flags_to_bool_result(ctx, cond, rd, ty);
2360         }
2361 
2362         Opcode::Select => {
2363             let ty = ctx.output_ty(insn, 0);
2364             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2365             let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
2366             let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
2367             let cond = lower_boolean_to_flags(ctx, inputs[0]);
2368             ctx.emit(Inst::gen_move(rd, rm, ty));
2369             if ty_is_float(ty) {
2370                 if ty_bits(ty) < 64 {
2371                     ctx.emit(Inst::FpuCMov32 { rd, cond, rm: rn });
2372                 } else {
2373                     ctx.emit(Inst::FpuCMov64 { rd, cond, rm: rn });
2374                 }
2375             } else {
2376                 if ty_bits(ty) < 64 {
2377                     ctx.emit(Inst::CMov32 { rd, cond, rm: rn });
2378                 } else {
2379                     ctx.emit(Inst::CMov64 { rd, cond, rm: rn });
2380                 }
2381             }
2382         }
2383 
2384         Opcode::SelectifSpectreGuard => {
2385             let ty = ctx.output_ty(insn, 0);
2386             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
2387             let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
2388             let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
2389             let condcode = ctx.data(insn).cond_code().unwrap();
2390             let cond = Cond::from_intcc(condcode);
2391             let is_signed = condcode_is_signed(condcode);
2392 
2393             // Verification ensures that the input is always a single-def ifcmp.
2394             let cmp_insn = ctx
2395                 .get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
2396                 .inst
2397                 .unwrap()
2398                 .0;
2399             debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
2400             lower_icmp_to_flags(ctx, cmp_insn, is_signed, true);
2401 
2402             ctx.emit(Inst::gen_move(rd, rm, ty));
2403             if ty_is_float(ty) {
2404                 if ty_bits(ty) < 64 {
2405                     ctx.emit(Inst::FpuCMov32 { rd, cond, rm: rn });
2406                 } else {
2407                     ctx.emit(Inst::FpuCMov64 { rd, cond, rm: rn });
2408                 }
2409             } else {
2410                 if ty_bits(ty) < 64 {
2411                     ctx.emit(Inst::CMov32 { rd, cond, rm: rn });
2412                 } else {
2413                     ctx.emit(Inst::CMov64 { rd, cond, rm: rn });
2414                 }
2415             }
2416         }
2417 
2418         Opcode::Trap | Opcode::ResumableTrap => {
2419             let trap_code = ctx.data(insn).trap_code().unwrap();
2420             ctx.emit_safepoint(Inst::Trap { trap_code })
2421         }
2422 
2423         Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => {
2424             let cond = lower_boolean_to_flags(ctx, inputs[0]);
2425             let negated = op == Opcode::Trapz;
2426             let cond = if negated { cond.invert() } else { cond };
2427             let trap_code = ctx.data(insn).trap_code().unwrap();
2428             ctx.emit_safepoint(Inst::TrapIf { trap_code, cond });
2429         }
2430 
2431         Opcode::Trapif => {
2432             let condcode = ctx.data(insn).cond_code().unwrap();
2433             let cond = Cond::from_intcc(condcode);
2434             let is_signed = condcode_is_signed(condcode);
2435 
2436             // Verification ensures that the input is always a single-def ifcmp.
2437             let cmp_insn = ctx
2438                 .get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
2439                 .inst
2440                 .unwrap()
2441                 .0;
2442             debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
2443             lower_icmp_to_flags(ctx, cmp_insn, is_signed, true);
2444 
2445             let trap_code = ctx.data(insn).trap_code().unwrap();
2446             ctx.emit_safepoint(Inst::TrapIf { trap_code, cond });
2447         }
2448 
2449         Opcode::Debugtrap => {
2450             ctx.emit(Inst::Debugtrap);
2451         }
2452 
2453         Opcode::Call | Opcode::CallIndirect => {
2454             let caller_conv = ctx.abi().call_conv();
2455             let (mut abi, inputs) = match op {
2456                 Opcode::Call => {
2457                     let (extname, dist) = ctx.call_target(insn).unwrap();
2458                     let extname = extname.clone();
2459                     let sig = ctx.call_sig(insn).unwrap();
2460                     assert!(inputs.len() == sig.params.len());
2461                     assert!(outputs.len() == sig.returns.len());
2462                     (
2463                         S390xABICaller::from_func(sig, &extname, dist, caller_conv, flags)?,
2464                         &inputs[..],
2465                     )
2466                 }
2467                 Opcode::CallIndirect => {
2468                     let ptr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
2469                     let sig = ctx.call_sig(insn).unwrap();
2470                     assert!(inputs.len() - 1 == sig.params.len());
2471                     assert!(outputs.len() == sig.returns.len());
2472                     (
2473                         S390xABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?,
2474                         &inputs[1..],
2475                     )
2476                 }
2477                 _ => unreachable!(),
2478             };
2479 
2480             assert!(inputs.len() == abi.num_args());
2481             for (i, input) in inputs.iter().enumerate() {
2482                 let arg_reg = put_input_in_reg(ctx, *input, NarrowValueMode::None);
2483                 abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(arg_reg));
2484             }
2485             abi.emit_call(ctx);
2486             for (i, output) in outputs.iter().enumerate() {
2487                 let retval_reg = get_output_reg(ctx, *output).only_reg().unwrap();
2488                 abi.emit_copy_retval_to_regs(ctx, i, ValueRegs::one(retval_reg));
2489             }
2490             abi.accumulate_outgoing_args_size(ctx);
2491         }
2492 
2493         Opcode::FallthroughReturn | Opcode::Return => {
2494             for (i, input) in inputs.iter().enumerate() {
2495                 let reg = put_input_in_reg(ctx, *input, NarrowValueMode::None);
2496                 let retval_reg = ctx.retval(i).only_reg().unwrap();
2497                 let ty = ctx.input_ty(insn, i);
2498                 ctx.emit(Inst::gen_move(retval_reg, reg, ty));
2499             }
2500             // N.B.: the Ret itself is generated by the ABI.
2501         }
2502 
2503         Opcode::AtomicRmw
2504         | Opcode::AtomicCas
2505         | Opcode::AtomicLoad
2506         | Opcode::AtomicStore
2507         | Opcode::Fence => {
2508             // TODO
2509             panic!("Atomic operations not implemented");
2510         }
2511 
2512         Opcode::RawBitcast
2513         | Opcode::Splat
2514         | Opcode::Swizzle
2515         | Opcode::Insertlane
2516         | Opcode::Extractlane
2517         | Opcode::Imin
2518         | Opcode::Umin
2519         | Opcode::Imax
2520         | Opcode::Umax
2521         | Opcode::AvgRound
2522         | Opcode::FminPseudo
2523         | Opcode::FmaxPseudo
2524         | Opcode::Uload8x8
2525         | Opcode::Uload8x8Complex
2526         | Opcode::Sload8x8
2527         | Opcode::Sload8x8Complex
2528         | Opcode::Uload16x4
2529         | Opcode::Uload16x4Complex
2530         | Opcode::Sload16x4
2531         | Opcode::Sload16x4Complex
2532         | Opcode::Uload32x2
2533         | Opcode::Uload32x2Complex
2534         | Opcode::Sload32x2
2535         | Opcode::Sload32x2Complex
2536         | Opcode::Vconst
2537         | Opcode::Shuffle
2538         | Opcode::Vsplit
2539         | Opcode::Vconcat
2540         | Opcode::Vselect
2541         | Opcode::VanyTrue
2542         | Opcode::VallTrue
2543         | Opcode::VhighBits
2544         | Opcode::ScalarToVector
2545         | Opcode::Snarrow
2546         | Opcode::Unarrow
2547         | Opcode::SwidenLow
2548         | Opcode::SwidenHigh
2549         | Opcode::UwidenLow
2550         | Opcode::UwidenHigh
2551         | Opcode::WideningPairwiseDotProductS => {
2552             // TODO
2553             panic!("Vector ops not implemented.");
2554         }
2555 
2556         Opcode::Isplit | Opcode::Iconcat => panic!("Wide integer ops not implemented."),
2557 
2558         Opcode::Spill
2559         | Opcode::Fill
2560         | Opcode::FillNop
2561         | Opcode::Regmove
2562         | Opcode::CopySpecial
2563         | Opcode::CopyToSsa
2564         | Opcode::CopyNop
2565         | Opcode::AdjustSpDown
2566         | Opcode::AdjustSpUpImm
2567         | Opcode::AdjustSpDownImm
2568         | Opcode::DummySargT
2569         | Opcode::IfcmpSp
2570         | Opcode::Regspill
2571         | Opcode::Regfill => {
2572             panic!("Unused opcode should not be encountered.");
2573         }
2574 
2575         Opcode::Ifcmp
2576         | Opcode::Ffcmp
2577         | Opcode::Trapff
2578         | Opcode::Trueif
2579         | Opcode::Trueff
2580         | Opcode::Selectif => {
2581             panic!("Flags opcode should not be encountered.");
2582         }
2583 
2584         Opcode::Jump
2585         | Opcode::Fallthrough
2586         | Opcode::Brz
2587         | Opcode::Brnz
2588         | Opcode::BrIcmp
2589         | Opcode::Brif
2590         | Opcode::Brff
2591         | Opcode::IndirectJumpTableBr
2592         | Opcode::BrTable => {
2593             panic!("Branch opcode reached non-branch lowering logic!");
2594         }
2595 
2596         Opcode::JumpTableEntry | Opcode::JumpTableBase => {
2597             panic!("Should not appear: we handle BrTable directly");
2598         }
2599 
2600         Opcode::Safepoint => {
2601             panic!("safepoint instructions not used by new backend's safepoints!");
2602         }
2603 
2604         Opcode::IaddImm
2605         | Opcode::ImulImm
2606         | Opcode::UdivImm
2607         | Opcode::SdivImm
2608         | Opcode::UremImm
2609         | Opcode::SremImm
2610         | Opcode::IrsubImm
2611         | Opcode::IaddCin
2612         | Opcode::IaddIfcin
2613         | Opcode::IaddCout
2614         | Opcode::IaddIfcout
2615         | Opcode::IaddCarry
2616         | Opcode::IaddIfcarry
2617         | Opcode::IsubBin
2618         | Opcode::IsubIfbin
2619         | Opcode::IsubBout
2620         | Opcode::IsubIfbout
2621         | Opcode::IsubBorrow
2622         | Opcode::IsubIfborrow
2623         | Opcode::BandImm
2624         | Opcode::BorImm
2625         | Opcode::BxorImm
2626         | Opcode::RotlImm
2627         | Opcode::RotrImm
2628         | Opcode::IshlImm
2629         | Opcode::UshrImm
2630         | Opcode::SshrImm
2631         | Opcode::IcmpImm
2632         | Opcode::IfcmpImm => {
2633             panic!("ALU+imm and ALU+carry ops should not appear here!");
2634         }
2635 
2636         #[cfg(feature = "x86")]
2637         Opcode::X86Udivmodx
2638         | Opcode::X86Sdivmodx
2639         | Opcode::X86Umulx
2640         | Opcode::X86Smulx
2641         | Opcode::X86Cvtt2si
2642         | Opcode::X86Fmin
2643         | Opcode::X86Fmax
2644         | Opcode::X86Push
2645         | Opcode::X86Pop
2646         | Opcode::X86Bsr
2647         | Opcode::X86Bsf
2648         | Opcode::X86Pblendw
2649         | Opcode::X86Pshufd
2650         | Opcode::X86Pshufb
2651         | Opcode::X86Pextr
2652         | Opcode::X86Pinsr
2653         | Opcode::X86Insertps
2654         | Opcode::X86Movsd
2655         | Opcode::X86Movlhps
2656         | Opcode::X86Psll
2657         | Opcode::X86Psrl
2658         | Opcode::X86Psra
2659         | Opcode::X86Ptest
2660         | Opcode::X86Pmaxs
2661         | Opcode::X86Pmaxu
2662         | Opcode::X86Pmins
2663         | Opcode::X86Pminu
2664         | Opcode::X86Pmullq
2665         | Opcode::X86Pmuludq
2666         | Opcode::X86Punpckh
2667         | Opcode::X86Punpckl
2668         | Opcode::X86Vcvtudq2ps
2669         | Opcode::X86Palignr
2670         | Opcode::X86ElfTlsGetAddr
2671         | Opcode::X86MachoTlsGetAddr => {
2672             panic!("x86-specific opcode in supposedly arch-neutral IR!");
2673         }
2674     }
2675 
2676     Ok(())
2677 }
2678 
2679 //============================================================================
2680 // Lowering: main entry point for lowering a branch group
2681 
lower_branch<C: LowerCtx<I = Inst>>( ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], ) -> CodegenResult<()>2682 fn lower_branch<C: LowerCtx<I = Inst>>(
2683     ctx: &mut C,
2684     branches: &[IRInst],
2685     targets: &[MachLabel],
2686 ) -> CodegenResult<()> {
2687     // A block should end with at most two branches. The first may be a
2688     // conditional branch; a conditional branch can be followed only by an
2689     // unconditional branch or fallthrough. Otherwise, if only one branch,
2690     // it may be an unconditional branch, a fallthrough, a return, or a
2691     // trap. These conditions are verified by `is_ebb_basic()` during the
2692     // verifier pass.
2693     assert!(branches.len() <= 2);
2694 
2695     if branches.len() == 2 {
2696         // Must be a conditional branch followed by an unconditional branch.
2697         let op0 = ctx.data(branches[0]).opcode();
2698         let op1 = ctx.data(branches[1]).opcode();
2699 
2700         assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
2701         let taken = BranchTarget::Label(targets[0]);
2702         let not_taken = BranchTarget::Label(targets[1]);
2703 
2704         match op0 {
2705             Opcode::Brz | Opcode::Brnz => {
2706                 let flag_input = InsnInput {
2707                     insn: branches[0],
2708                     input: 0,
2709                 };
2710                 let cond = lower_boolean_to_flags(ctx, flag_input);
2711                 let negated = op0 == Opcode::Brz;
2712                 let cond = if negated { cond.invert() } else { cond };
2713                 ctx.emit(Inst::CondBr {
2714                     taken,
2715                     not_taken,
2716                     cond,
2717                 });
2718             }
2719 
2720             Opcode::Brif => {
2721                 let condcode = ctx.data(branches[0]).cond_code().unwrap();
2722                 let cond = Cond::from_intcc(condcode);
2723                 let is_signed = condcode_is_signed(condcode);
2724 
2725                 // Verification ensures that the input is always a single-def ifcmp.
2726                 let cmp_insn = ctx
2727                     .get_input_as_source_or_const(branches[0], 0)
2728                     .inst
2729                     .unwrap()
2730                     .0;
2731                 debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
2732                 lower_icmp_to_flags(ctx, cmp_insn, is_signed, true);
2733 
2734                 ctx.emit(Inst::CondBr {
2735                     taken,
2736                     not_taken,
2737                     cond,
2738                 });
2739             }
2740 
2741             Opcode::Brff => unreachable!(),
2742 
2743             _ => unimplemented!(),
2744         }
2745     } else {
2746         // Must be an unconditional branch or an indirect branch.
2747         let op = ctx.data(branches[0]).opcode();
2748         match op {
2749             Opcode::Jump | Opcode::Fallthrough => {
2750                 assert!(branches.len() == 1);
2751                 // In the Fallthrough case, the machine-independent driver
2752                 // fills in `targets[0]` with our fallthrough block, so this
2753                 // is valid for both Jump and Fallthrough.
2754                 ctx.emit(Inst::Jump {
2755                     dest: BranchTarget::Label(targets[0]),
2756                 });
2757             }
2758 
2759             Opcode::BrTable => {
2760                 let jt_size = targets.len() - 1;
2761                 assert!(jt_size <= std::u32::MAX as usize);
2762 
2763                 // Load up jump table element index.
2764                 let ridx = put_input_in_reg(
2765                     ctx,
2766                     InsnInput {
2767                         insn: branches[0],
2768                         input: 0,
2769                     },
2770                     NarrowValueMode::ZeroExtend64,
2771                 );
2772 
2773                 // Temp registers needed by the compound instruction.
2774                 let rtmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
2775                 let rtmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
2776 
2777                 // Emit the compound instruction that does:
2778                 //
2779                 // clgfi %rIdx, <jt-size>
2780                 // jghe <default-target>
2781                 // sllg %rTmp2, %rIdx, 2
2782                 // larl %rTmp1, <jt-base>
2783                 // lgf %rTmp2, 0(%rTmp2, %rTmp1)
2784                 // agrk %rTmp1, %rTmp1, %rTmp2
2785                 // br %rA
2786                 // [jt entries]
2787                 //
2788                 // This must be *one* instruction in the vcode because
2789                 // we cannot allow regalloc to insert any spills/fills
2790                 // in the middle of the sequence; otherwise, the ADR's
2791                 // PC-rel offset to the jumptable would be incorrect.
2792                 // (The alternative is to introduce a relocation pass
2793                 // for inlined jumptables, which is much worse, IMHO.)
2794 
2795                 let default_target = BranchTarget::Label(targets[0]);
2796                 let jt_targets: Vec<BranchTarget> = targets
2797                     .iter()
2798                     .skip(1)
2799                     .map(|bix| BranchTarget::Label(*bix))
2800                     .collect();
2801                 let targets_for_term: Vec<MachLabel> = targets.to_vec();
2802                 ctx.emit(Inst::JTSequence {
2803                     ridx,
2804                     rtmp1,
2805                     rtmp2,
2806                     info: Box::new(JTSequenceInfo {
2807                         default_target,
2808                         targets: jt_targets,
2809                         targets_for_term: targets_for_term,
2810                     }),
2811                 });
2812             }
2813 
2814             _ => panic!("Unknown branch type!"),
2815         }
2816     }
2817 
2818     Ok(())
2819 }
2820 
2821 //=============================================================================
2822 // Lowering-backend trait implementation.
2823 
2824 impl LowerBackend for S390xBackend {
2825     type MInst = Inst;
2826 
lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()>2827     fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
2828         lower_insn_to_regs(ctx, ir_inst, &self.flags)
2829     }
2830 
lower_branch_group<C: LowerCtx<I = Inst>>( &self, ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], ) -> CodegenResult<()>2831     fn lower_branch_group<C: LowerCtx<I = Inst>>(
2832         &self,
2833         ctx: &mut C,
2834         branches: &[IRInst],
2835         targets: &[MachLabel],
2836     ) -> CodegenResult<()> {
2837         lower_branch(ctx, branches, targets)
2838     }
2839 }
2840