1 //! This module defines aarch64-specific machine instruction types.
2 
3 // Some variants are not constructed, but we still want them as options in the future.
4 #![allow(dead_code)]
5 
6 use crate::binemit::CodeOffset;
7 use crate::ir::types::{
8     B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64,
9 };
10 use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type, ValueLabel};
11 use crate::isa::unwind::UnwindInst;
12 use crate::isa::CallConv;
13 use crate::machinst::*;
14 use crate::{settings, CodegenError, CodegenResult};
15 
16 use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
17 use regalloc::{RegUsageCollector, RegUsageMapper};
18 
19 use alloc::boxed::Box;
20 use alloc::vec::Vec;
21 use core::convert::TryFrom;
22 use smallvec::{smallvec, SmallVec};
23 use std::string::{String, ToString};
24 
25 pub mod regs;
26 pub use self::regs::*;
27 pub mod imms;
28 pub use self::imms::*;
29 pub mod args;
30 pub use self::args::*;
31 pub mod emit;
32 pub use self::emit::*;
33 use crate::isa::aarch64::abi::AArch64MachineDeps;
34 
35 pub mod unwind;
36 
37 #[cfg(test)]
38 mod emit_tests;
39 
40 //=============================================================================
41 // Instructions (top level): definition
42 
43 /// An ALU operation. This can be paired with several instruction formats
44 /// below (see `Inst`) in any combination.
45 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
46 pub enum ALUOp {
47     Add32,
48     Add64,
49     Sub32,
50     Sub64,
51     Orr32,
52     Orr64,
53     OrrNot32,
54     OrrNot64,
55     And32,
56     And64,
57     AndS32,
58     AndS64,
59     AndNot32,
60     AndNot64,
61     /// XOR (AArch64 calls this "EOR")
62     Eor32,
63     /// XOR (AArch64 calls this "EOR")
64     Eor64,
65     /// XNOR (AArch64 calls this "EOR-NOT")
66     EorNot32,
67     /// XNOR (AArch64 calls this "EOR-NOT")
68     EorNot64,
69     /// Add, setting flags
70     AddS32,
71     /// Add, setting flags
72     AddS64,
73     /// Sub, setting flags
74     SubS32,
75     /// Sub, setting flags
76     SubS64,
77     /// Signed multiply, high-word result
78     SMulH,
79     /// Unsigned multiply, high-word result
80     UMulH,
81     SDiv64,
82     UDiv64,
83     RotR32,
84     RotR64,
85     Lsr32,
86     Lsr64,
87     Asr32,
88     Asr64,
89     Lsl32,
90     Lsl64,
91     /// Add with carry
92     Adc32,
93     Adc64,
94     /// Add with carry, settings flags
95     AdcS32,
96     AdcS64,
97     /// Subtract with carry
98     Sbc32,
99     Sbc64,
100     /// Subtract with carry, settings flags
101     SbcS32,
102     SbcS64,
103 }
104 
105 /// An ALU operation with three arguments.
106 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
107 pub enum ALUOp3 {
108     /// Multiply-add
109     MAdd32,
110     /// Multiply-add
111     MAdd64,
112     /// Multiply-sub
113     MSub32,
114     /// Multiply-sub
115     MSub64,
116 }
117 
118 /// A floating-point unit (FPU) operation with one arg.
119 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
120 pub enum FPUOp1 {
121     Abs32,
122     Abs64,
123     Neg32,
124     Neg64,
125     Sqrt32,
126     Sqrt64,
127     Cvt32To64,
128     Cvt64To32,
129 }
130 
131 /// A floating-point unit (FPU) operation with two args.
132 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
133 pub enum FPUOp2 {
134     Add32,
135     Add64,
136     Sub32,
137     Sub64,
138     Mul32,
139     Mul64,
140     Div32,
141     Div64,
142     Max32,
143     Max64,
144     Min32,
145     Min64,
146     /// Signed saturating add
147     Sqadd64,
148     /// Unsigned saturating add
149     Uqadd64,
150     /// Signed saturating subtract
151     Sqsub64,
152     /// Unsigned saturating subtract
153     Uqsub64,
154 }
155 
156 /// A floating-point unit (FPU) operation with two args, a register and an immediate.
157 #[derive(Copy, Clone, Debug)]
158 pub enum FPUOpRI {
159     /// Unsigned right shift. Rd = Rn << #imm
160     UShr32(FPURightShiftImm),
161     /// Unsigned right shift. Rd = Rn << #imm
162     UShr64(FPURightShiftImm),
163     /// Shift left and insert. Rd |= Rn << #imm
164     Sli32(FPULeftShiftImm),
165     /// Shift left and insert. Rd |= Rn << #imm
166     Sli64(FPULeftShiftImm),
167 }
168 
169 /// A floating-point unit (FPU) operation with three args.
170 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
171 pub enum FPUOp3 {
172     MAdd32,
173     MAdd64,
174 }
175 
176 /// A conversion from an FP to an integer value.
177 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
178 pub enum FpuToIntOp {
179     F32ToU32,
180     F32ToI32,
181     F32ToU64,
182     F32ToI64,
183     F64ToU32,
184     F64ToI32,
185     F64ToU64,
186     F64ToI64,
187 }
188 
189 /// A conversion from an integer to an FP value.
190 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
191 pub enum IntToFpuOp {
192     U32ToF32,
193     I32ToF32,
194     U32ToF64,
195     I32ToF64,
196     U64ToF32,
197     I64ToF32,
198     U64ToF64,
199     I64ToF64,
200 }
201 
202 /// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to
203 /// nearest, and for 32- or 64-bit FP values.
204 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
205 pub enum FpuRoundMode {
206     Minus32,
207     Minus64,
208     Plus32,
209     Plus64,
210     Zero32,
211     Zero64,
212     Nearest32,
213     Nearest64,
214 }
215 
216 /// Type of vector element extensions.
217 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
218 pub enum VecExtendOp {
219     /// Signed extension of 8-bit elements
220     Sxtl8,
221     /// Signed extension of 16-bit elements
222     Sxtl16,
223     /// Signed extension of 32-bit elements
224     Sxtl32,
225     /// Unsigned extension of 8-bit elements
226     Uxtl8,
227     /// Unsigned extension of 16-bit elements
228     Uxtl16,
229     /// Unsigned extension of 32-bit elements
230     Uxtl32,
231 }
232 
233 /// A vector ALU operation.
234 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
235 pub enum VecALUOp {
236     /// Signed saturating add
237     Sqadd,
238     /// Unsigned saturating add
239     Uqadd,
240     /// Signed saturating subtract
241     Sqsub,
242     /// Unsigned saturating subtract
243     Uqsub,
244     /// Compare bitwise equal
245     Cmeq,
246     /// Compare signed greater than or equal
247     Cmge,
248     /// Compare signed greater than
249     Cmgt,
250     /// Compare unsigned higher
251     Cmhs,
252     /// Compare unsigned higher or same
253     Cmhi,
254     /// Floating-point compare equal
255     Fcmeq,
256     /// Floating-point compare greater than
257     Fcmgt,
258     /// Floating-point compare greater than or equal
259     Fcmge,
260     /// Bitwise and
261     And,
262     /// Bitwise bit clear
263     Bic,
264     /// Bitwise inclusive or
265     Orr,
266     /// Bitwise exclusive or
267     Eor,
268     /// Bitwise select
269     Bsl,
270     /// Unsigned maximum pairwise
271     Umaxp,
272     /// Add
273     Add,
274     /// Subtract
275     Sub,
276     /// Multiply
277     Mul,
278     /// Signed shift left
279     Sshl,
280     /// Unsigned shift left
281     Ushl,
282     /// Unsigned minimum
283     Umin,
284     /// Signed minimum
285     Smin,
286     /// Unsigned maximum
287     Umax,
288     /// Signed maximum
289     Smax,
290     /// Unsigned rounding halving add
291     Urhadd,
292     /// Floating-point add
293     Fadd,
294     /// Floating-point subtract
295     Fsub,
296     /// Floating-point divide
297     Fdiv,
298     /// Floating-point maximum
299     Fmax,
300     /// Floating-point minimum
301     Fmin,
302     /// Floating-point multiply
303     Fmul,
304     /// Add pairwise
305     Addp,
306     /// Zip vectors (primary) [meaning, high halves]
307     Zip1,
308     /// Signed saturating rounding doubling multiply returning high half
309     Sqrdmulh,
310 }
311 
312 /// A Vector miscellaneous operation with two registers.
313 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
314 pub enum VecMisc2 {
315     /// Bitwise NOT
316     Not,
317     /// Negate
318     Neg,
319     /// Absolute value
320     Abs,
321     /// Floating-point absolute value
322     Fabs,
323     /// Floating-point negate
324     Fneg,
325     /// Floating-point square root
326     Fsqrt,
327     /// Reverse elements in 64-bit doublewords
328     Rev64,
329     /// Floating-point convert to signed integer, rounding toward zero
330     Fcvtzs,
331     /// Floating-point convert to unsigned integer, rounding toward zero
332     Fcvtzu,
333     /// Signed integer convert to floating-point
334     Scvtf,
335     /// Unsigned integer convert to floating-point
336     Ucvtf,
337     /// Floating point round to integral, rounding towards nearest
338     Frintn,
339     /// Floating point round to integral, rounding towards zero
340     Frintz,
341     /// Floating point round to integral, rounding towards minus infinity
342     Frintm,
343     /// Floating point round to integral, rounding towards plus infinity
344     Frintp,
345     /// Population count per byte
346     Cnt,
347     /// Compare bitwise equal to 0
348     Cmeq0,
349 }
350 
351 /// A vector widening operation with one argument.
352 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
353 pub enum VecRRLongOp {
354     /// Floating-point convert to higher precision long, 16-bit elements
355     Fcvtl16,
356     /// Floating-point convert to higher precision long, 32-bit elements
357     Fcvtl32,
358     /// Shift left long (by element size), 8-bit elements
359     Shll8,
360     /// Shift left long (by element size), 16-bit elements
361     Shll16,
362     /// Shift left long (by element size), 32-bit elements
363     Shll32,
364 }
365 
366 /// A vector narrowing operation with one argument.
367 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
368 pub enum VecRRNarrowOp {
369     /// Extract narrow, 16-bit elements
370     Xtn16,
371     /// Extract narrow, 32-bit elements
372     Xtn32,
373     /// Extract narrow, 64-bit elements
374     Xtn64,
375     /// Signed saturating extract narrow, 16-bit elements
376     Sqxtn16,
377     /// Signed saturating extract narrow, 32-bit elements
378     Sqxtn32,
379     /// Signed saturating extract narrow, 64-bit elements
380     Sqxtn64,
381     /// Signed saturating extract unsigned narrow, 16-bit elements
382     Sqxtun16,
383     /// Signed saturating extract unsigned narrow, 32-bit elements
384     Sqxtun32,
385     /// Signed saturating extract unsigned narrow, 64-bit elements
386     Sqxtun64,
387     /// Unsigned saturating extract narrow, 16-bit elements
388     Uqxtn16,
389     /// Unsigned saturating extract narrow, 32-bit elements
390     Uqxtn32,
391     /// Unsigned saturating extract narrow, 64-bit elements
392     Uqxtn64,
393     /// Floating-point convert to lower precision narrow, 32-bit elements
394     Fcvtn32,
395     /// Floating-point convert to lower precision narrow, 64-bit elements
396     Fcvtn64,
397 }
398 
399 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
400 pub enum VecRRRLongOp {
401     /// Signed multiply long.
402     Smull8,
403     Smull16,
404     Smull32,
405     /// Unsigned multiply long.
406     Umull8,
407     Umull16,
408     Umull32,
409     /// Unsigned multiply add long
410     Umlal8,
411     Umlal16,
412     Umlal32,
413 }
414 
415 /// A vector operation on a pair of elements with one register.
416 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
417 pub enum VecPairOp {
418     /// Add pair of elements
419     Addp,
420 }
421 
422 /// 1-operand vector instruction that extends elements of the input register
423 /// and operates on a pair of elements.
424 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
425 pub enum VecRRPairLongOp {
426     /// Sign extend and add pair of elements
427     Saddlp8,
428     Saddlp16,
429     /// Unsigned extend and add pair of elements
430     Uaddlp8,
431     Uaddlp16,
432 }
433 
434 /// An operation across the lanes of vectors.
435 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
436 pub enum VecLanesOp {
437     /// Integer addition across a vector
438     Addv,
439     /// Unsigned minimum across a vector
440     Uminv,
441 }
442 
443 /// A shift-by-immediate operation on each lane of a vector.
444 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
445 pub enum VecShiftImmOp {
446     // Unsigned shift left
447     Shl,
448     // Unsigned shift right
449     Ushr,
450     // Signed shift right
451     Sshr,
452 }
453 
454 /// An operation on the bits of a register. This can be paired with several instruction formats
455 /// below (see `Inst`) in any combination.
456 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
457 pub enum BitOp {
458     /// Bit reverse
459     RBit32,
460     /// Bit reverse
461     RBit64,
462     Clz32,
463     Clz64,
464     Cls32,
465     Cls64,
466 }
467 
468 impl BitOp {
469     /// What is the opcode's native width?
operand_size(&self) -> OperandSize470     pub fn operand_size(&self) -> OperandSize {
471         match self {
472             BitOp::RBit32 | BitOp::Clz32 | BitOp::Cls32 => OperandSize::Size32,
473             _ => OperandSize::Size64,
474         }
475     }
476 
477     /// Get the assembly mnemonic for this opcode.
op_str(&self) -> &'static str478     pub fn op_str(&self) -> &'static str {
479         match self {
480             BitOp::RBit32 | BitOp::RBit64 => "rbit",
481             BitOp::Clz32 | BitOp::Clz64 => "clz",
482             BitOp::Cls32 | BitOp::Cls64 => "cls",
483         }
484     }
485 }
486 
487 impl From<(Opcode, Type)> for BitOp {
488     /// Get the BitOp from the IR opcode.
from(op_ty: (Opcode, Type)) -> BitOp489     fn from(op_ty: (Opcode, Type)) -> BitOp {
490         match op_ty {
491             (Opcode::Bitrev, I32) => BitOp::RBit32,
492             (Opcode::Bitrev, I64) => BitOp::RBit64,
493             (Opcode::Clz, I32) => BitOp::Clz32,
494             (Opcode::Clz, I64) => BitOp::Clz64,
495             (Opcode::Cls, I32) => BitOp::Cls32,
496             (Opcode::Cls, I64) => BitOp::Cls64,
497             _ => unreachable!("Called with non-bit op!: {:?}", op_ty),
498         }
499     }
500 }
501 
502 /// Additional information for (direct) Call instructions, left out of line to lower the size of
503 /// the Inst enum.
504 #[derive(Clone, Debug)]
505 pub struct CallInfo {
506     pub dest: ExternalName,
507     pub uses: Vec<Reg>,
508     pub defs: Vec<Writable<Reg>>,
509     pub opcode: Opcode,
510     pub caller_callconv: CallConv,
511     pub callee_callconv: CallConv,
512 }
513 
514 /// Additional information for CallInd instructions, left out of line to lower the size of the Inst
515 /// enum.
516 #[derive(Clone, Debug)]
517 pub struct CallIndInfo {
518     pub rn: Reg,
519     pub uses: Vec<Reg>,
520     pub defs: Vec<Writable<Reg>>,
521     pub opcode: Opcode,
522     pub caller_callconv: CallConv,
523     pub callee_callconv: CallConv,
524 }
525 
526 /// Additional information for JTSequence instructions, left out of line to lower the size of the Inst
527 /// enum.
528 #[derive(Clone, Debug)]
529 pub struct JTSequenceInfo {
530     pub targets: Vec<BranchTarget>,
531     pub default_target: BranchTarget,
532     pub targets_for_term: Vec<MachLabel>, // needed for MachTerminator.
533 }
534 
535 /// Instruction formats.
536 #[derive(Clone, Debug)]
537 pub enum Inst {
538     /// A no-op of zero size.
539     Nop0,
540 
541     /// A no-op that is one instruction large.
542     Nop4,
543 
544     /// An ALU operation with two register sources and a register destination.
545     AluRRR {
546         alu_op: ALUOp,
547         rd: Writable<Reg>,
548         rn: Reg,
549         rm: Reg,
550     },
551     /// An ALU operation with three register sources and a register destination.
552     AluRRRR {
553         alu_op: ALUOp3,
554         rd: Writable<Reg>,
555         rn: Reg,
556         rm: Reg,
557         ra: Reg,
558     },
559     /// An ALU operation with a register source and an immediate-12 source, and a register
560     /// destination.
561     AluRRImm12 {
562         alu_op: ALUOp,
563         rd: Writable<Reg>,
564         rn: Reg,
565         imm12: Imm12,
566     },
567     /// An ALU operation with a register source and an immediate-logic source, and a register destination.
568     AluRRImmLogic {
569         alu_op: ALUOp,
570         rd: Writable<Reg>,
571         rn: Reg,
572         imml: ImmLogic,
573     },
574     /// An ALU operation with a register source and an immediate-shiftamt source, and a register destination.
575     AluRRImmShift {
576         alu_op: ALUOp,
577         rd: Writable<Reg>,
578         rn: Reg,
579         immshift: ImmShift,
580     },
581     /// An ALU operation with two register sources, one of which can be shifted, and a register
582     /// destination.
583     AluRRRShift {
584         alu_op: ALUOp,
585         rd: Writable<Reg>,
586         rn: Reg,
587         rm: Reg,
588         shiftop: ShiftOpAndAmt,
589     },
590     /// An ALU operation with two register sources, one of which can be {zero,sign}-extended and
591     /// shifted, and a register destination.
592     AluRRRExtend {
593         alu_op: ALUOp,
594         rd: Writable<Reg>,
595         rn: Reg,
596         rm: Reg,
597         extendop: ExtendOp,
598     },
599 
600     /// A bit op instruction with a single register source.
601     BitRR {
602         op: BitOp,
603         rd: Writable<Reg>,
604         rn: Reg,
605     },
606 
607     /// An unsigned (zero-extending) 8-bit load.
608     ULoad8 {
609         rd: Writable<Reg>,
610         mem: AMode,
611         flags: MemFlags,
612     },
613     /// A signed (sign-extending) 8-bit load.
614     SLoad8 {
615         rd: Writable<Reg>,
616         mem: AMode,
617         flags: MemFlags,
618     },
619     /// An unsigned (zero-extending) 16-bit load.
620     ULoad16 {
621         rd: Writable<Reg>,
622         mem: AMode,
623         flags: MemFlags,
624     },
625     /// A signed (sign-extending) 16-bit load.
626     SLoad16 {
627         rd: Writable<Reg>,
628         mem: AMode,
629         flags: MemFlags,
630     },
631     /// An unsigned (zero-extending) 32-bit load.
632     ULoad32 {
633         rd: Writable<Reg>,
634         mem: AMode,
635         flags: MemFlags,
636     },
637     /// A signed (sign-extending) 32-bit load.
638     SLoad32 {
639         rd: Writable<Reg>,
640         mem: AMode,
641         flags: MemFlags,
642     },
643     /// A 64-bit load.
644     ULoad64 {
645         rd: Writable<Reg>,
646         mem: AMode,
647         flags: MemFlags,
648     },
649 
650     /// An 8-bit store.
651     Store8 {
652         rd: Reg,
653         mem: AMode,
654         flags: MemFlags,
655     },
656     /// A 16-bit store.
657     Store16 {
658         rd: Reg,
659         mem: AMode,
660         flags: MemFlags,
661     },
662     /// A 32-bit store.
663     Store32 {
664         rd: Reg,
665         mem: AMode,
666         flags: MemFlags,
667     },
668     /// A 64-bit store.
669     Store64 {
670         rd: Reg,
671         mem: AMode,
672         flags: MemFlags,
673     },
674 
675     /// A store of a pair of registers.
676     StoreP64 {
677         rt: Reg,
678         rt2: Reg,
679         mem: PairAMode,
680         flags: MemFlags,
681     },
682     /// A load of a pair of registers.
683     LoadP64 {
684         rt: Writable<Reg>,
685         rt2: Writable<Reg>,
686         mem: PairAMode,
687         flags: MemFlags,
688     },
689 
690     /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we
691     /// keep them separate at the `Inst` level for better pretty-printing
692     /// and faster `is_move()` logic.
693     Mov64 {
694         rd: Writable<Reg>,
695         rm: Reg,
696     },
697 
698     /// A 32-bit MOV. Zeroes the top 32 bits of the destination. This is
699     /// effectively an alias for an unsigned 32-to-64-bit extension.
700     Mov32 {
701         rd: Writable<Reg>,
702         rm: Reg,
703     },
704 
705     /// A MOVZ with a 16-bit immediate.
706     MovZ {
707         rd: Writable<Reg>,
708         imm: MoveWideConst,
709         size: OperandSize,
710     },
711 
712     /// A MOVN with a 16-bit immediate.
713     MovN {
714         rd: Writable<Reg>,
715         imm: MoveWideConst,
716         size: OperandSize,
717     },
718 
719     /// A MOVK with a 16-bit immediate.
720     MovK {
721         rd: Writable<Reg>,
722         imm: MoveWideConst,
723         size: OperandSize,
724     },
725 
726     /// A sign- or zero-extend operation.
727     Extend {
728         rd: Writable<Reg>,
729         rn: Reg,
730         signed: bool,
731         from_bits: u8,
732         to_bits: u8,
733     },
734 
735     /// A conditional-select operation.
736     CSel {
737         rd: Writable<Reg>,
738         cond: Cond,
739         rn: Reg,
740         rm: Reg,
741     },
742 
743     /// A conditional-set operation.
744     CSet {
745         rd: Writable<Reg>,
746         cond: Cond,
747     },
748 
749     /// A conditional-set-mask operation.
750     CSetm {
751         rd: Writable<Reg>,
752         cond: Cond,
753     },
754 
755     /// A conditional comparison with an immediate.
756     CCmpImm {
757         size: OperandSize,
758         rn: Reg,
759         imm: UImm5,
760         nzcv: NZCV,
761         cond: Cond,
762     },
763 
764     /// A synthetic insn, which is a load-linked store-conditional loop, that has the overall
765     /// effect of atomically modifying a memory location in a particular way.  Because we have
766     /// no way to explain to the regalloc about earlyclobber registers, this instruction has
767     /// completely fixed operand registers, and we rely on the RA's coalescing to remove copies
768     /// in the surrounding code to the extent it can.  The sequence is both preceded and
769     /// followed by a fence which is at least as comprehensive as that of the `Fence`
770     /// instruction below.  This instruction is sequentially consistent.  The operand
771     /// conventions are:
772     ///
773     /// x25   (rd) address
774     /// x26   (rd) second operand for `op`
775     /// x27   (wr) old value
776     /// x24   (wr) scratch reg; value afterwards has no meaning
777     /// x28   (wr) scratch reg; value afterwards has no meaning
778     AtomicRMW {
779         ty: Type, // I8, I16, I32 or I64
780         op: inst_common::AtomicRmwOp,
781     },
782 
783     /// An atomic compare-and-swap operation. This instruction is sequentially consistent.
784     AtomicCAS {
785         rs: Writable<Reg>,
786         rt: Reg,
787         rn: Reg,
788         ty: Type,
789     },
790 
791     /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
792     /// store-conditional loop.
793     /// This instruction is sequentially consistent.
794     /// Note that the operand conventions, although very similar to AtomicRMW, are different:
795     ///
796     /// x25   (rd) address
797     /// x26   (rd) expected value
798     /// x28   (rd) replacement value
799     /// x27   (wr) old value
800     /// x24   (wr) scratch reg; value afterwards has no meaning
801     AtomicCASLoop {
802         ty: Type, // I8, I16, I32 or I64
803     },
804 
805     /// Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put
806     /// it in `rn`, optionally zero-extending to fill a word or double word result.
807     /// This instruction is sequentially consistent.
808     LoadAcquire {
809         access_ty: Type, // I8, I16, I32 or I64
810         rt: Writable<Reg>,
811         rn: Reg,
812     },
813 
814     /// Write the lowest `ty` bits of `rt` to address `rn`.
815     /// This instruction is sequentially consistent.
816     StoreRelease {
817         access_ty: Type, // I8, I16, I32 or I64
818         rt: Reg,
819         rn: Reg,
820     },
821 
822     /// A memory fence.  This must provide ordering to ensure that, at a minimum, neither loads
823     /// nor stores may move forwards or backwards across the fence.  Currently emitted as "dmb
824     /// ish".  This instruction is sequentially consistent.
825     Fence,
826 
827     /// FPU move. Note that this is distinct from a vector-register
828     /// move; moving just 64 bits seems to be significantly faster.
829     FpuMove64 {
830         rd: Writable<Reg>,
831         rn: Reg,
832     },
833 
834     /// Vector register move.
835     FpuMove128 {
836         rd: Writable<Reg>,
837         rn: Reg,
838     },
839 
840     /// Move to scalar from a vector element.
841     FpuMoveFromVec {
842         rd: Writable<Reg>,
843         rn: Reg,
844         idx: u8,
845         size: VectorSize,
846     },
847 
848     /// Zero-extend a SIMD & FP scalar to the full width of a vector register.
849     FpuExtend {
850         rd: Writable<Reg>,
851         rn: Reg,
852         size: ScalarSize,
853     },
854 
855     /// 1-op FPU instruction.
856     FpuRR {
857         fpu_op: FPUOp1,
858         rd: Writable<Reg>,
859         rn: Reg,
860     },
861 
862     /// 2-op FPU instruction.
863     FpuRRR {
864         fpu_op: FPUOp2,
865         rd: Writable<Reg>,
866         rn: Reg,
867         rm: Reg,
868     },
869 
870     FpuRRI {
871         fpu_op: FPUOpRI,
872         rd: Writable<Reg>,
873         rn: Reg,
874     },
875 
876     /// 3-op FPU instruction.
877     FpuRRRR {
878         fpu_op: FPUOp3,
879         rd: Writable<Reg>,
880         rn: Reg,
881         rm: Reg,
882         ra: Reg,
883     },
884 
885     /// FPU comparison, single-precision (32 bit).
886     FpuCmp32 {
887         rn: Reg,
888         rm: Reg,
889     },
890 
891     /// FPU comparison, double-precision (64 bit).
892     FpuCmp64 {
893         rn: Reg,
894         rm: Reg,
895     },
896 
897     /// Floating-point load, single-precision (32 bit).
898     FpuLoad32 {
899         rd: Writable<Reg>,
900         mem: AMode,
901         flags: MemFlags,
902     },
903     /// Floating-point store, single-precision (32 bit).
904     FpuStore32 {
905         rd: Reg,
906         mem: AMode,
907         flags: MemFlags,
908     },
909     /// Floating-point load, double-precision (64 bit).
910     FpuLoad64 {
911         rd: Writable<Reg>,
912         mem: AMode,
913         flags: MemFlags,
914     },
915     /// Floating-point store, double-precision (64 bit).
916     FpuStore64 {
917         rd: Reg,
918         mem: AMode,
919         flags: MemFlags,
920     },
921     /// Floating-point/vector load, 128 bit.
922     FpuLoad128 {
923         rd: Writable<Reg>,
924         mem: AMode,
925         flags: MemFlags,
926     },
927     /// Floating-point/vector store, 128 bit.
928     FpuStore128 {
929         rd: Reg,
930         mem: AMode,
931         flags: MemFlags,
932     },
933     /// A load of a pair of floating-point registers, double precision (64-bit).
934     FpuLoadP64 {
935         rt: Writable<Reg>,
936         rt2: Writable<Reg>,
937         mem: PairAMode,
938         flags: MemFlags,
939     },
940     /// A store of a pair of floating-point registers, double precision (64-bit).
941     FpuStoreP64 {
942         rt: Reg,
943         rt2: Reg,
944         mem: PairAMode,
945         flags: MemFlags,
946     },
947     /// A load of a pair of floating-point registers, 128-bit.
948     FpuLoadP128 {
949         rt: Writable<Reg>,
950         rt2: Writable<Reg>,
951         mem: PairAMode,
952         flags: MemFlags,
953     },
954     /// A store of a pair of floating-point registers, 128-bit.
955     FpuStoreP128 {
956         rt: Reg,
957         rt2: Reg,
958         mem: PairAMode,
959         flags: MemFlags,
960     },
961     LoadFpuConst64 {
962         rd: Writable<Reg>,
963         const_data: u64,
964     },
965 
966     LoadFpuConst128 {
967         rd: Writable<Reg>,
968         const_data: u128,
969     },
970 
971     /// Conversion: FP -> integer.
972     FpuToInt {
973         op: FpuToIntOp,
974         rd: Writable<Reg>,
975         rn: Reg,
976     },
977 
978     /// Conversion: integer -> FP.
979     IntToFpu {
980         op: IntToFpuOp,
981         rd: Writable<Reg>,
982         rn: Reg,
983     },
984 
985     /// FP conditional select, 32 bit.
986     FpuCSel32 {
987         rd: Writable<Reg>,
988         rn: Reg,
989         rm: Reg,
990         cond: Cond,
991     },
992     /// FP conditional select, 64 bit.
993     FpuCSel64 {
994         rd: Writable<Reg>,
995         rn: Reg,
996         rm: Reg,
997         cond: Cond,
998     },
999 
1000     /// Round to integer.
1001     FpuRound {
1002         op: FpuRoundMode,
1003         rd: Writable<Reg>,
1004         rn: Reg,
1005     },
1006 
1007     /// Move from a GPR to a vector register.  The scalar value is parked in the lowest lane
1008     /// of the destination, and all other lanes are zeroed out.  Currently only 32- and 64-bit
1009     /// transactions are supported.
1010     MovToFpu {
1011         rd: Writable<Reg>,
1012         rn: Reg,
1013         size: ScalarSize,
1014     },
1015 
1016     /// Move to a vector element from a GPR.
1017     MovToVec {
1018         rd: Writable<Reg>,
1019         rn: Reg,
1020         idx: u8,
1021         size: VectorSize,
1022     },
1023 
1024     /// Unsigned move from a vector element to a GPR.
1025     MovFromVec {
1026         rd: Writable<Reg>,
1027         rn: Reg,
1028         idx: u8,
1029         size: VectorSize,
1030     },
1031 
1032     /// Signed move from a vector element to a GPR.
1033     MovFromVecSigned {
1034         rd: Writable<Reg>,
1035         rn: Reg,
1036         idx: u8,
1037         size: VectorSize,
1038         scalar_size: OperandSize,
1039     },
1040 
1041     /// Duplicate general-purpose register to vector.
1042     VecDup {
1043         rd: Writable<Reg>,
1044         rn: Reg,
1045         size: VectorSize,
1046     },
1047 
1048     /// Duplicate scalar to vector.
1049     VecDupFromFpu {
1050         rd: Writable<Reg>,
1051         rn: Reg,
1052         size: VectorSize,
1053     },
1054 
1055     /// Duplicate FP immediate to vector.
1056     VecDupFPImm {
1057         rd: Writable<Reg>,
1058         imm: ASIMDFPModImm,
1059         size: VectorSize,
1060     },
1061 
1062     /// Duplicate immediate to vector.
1063     VecDupImm {
1064         rd: Writable<Reg>,
1065         imm: ASIMDMovModImm,
1066         invert: bool,
1067         size: VectorSize,
1068     },
1069 
1070     /// Vector extend.
1071     VecExtend {
1072         t: VecExtendOp,
1073         rd: Writable<Reg>,
1074         rn: Reg,
1075         high_half: bool,
1076     },
1077 
1078     /// Move vector element to another vector element.
1079     VecMovElement {
1080         rd: Writable<Reg>,
1081         rn: Reg,
1082         dest_idx: u8,
1083         src_idx: u8,
1084         size: VectorSize,
1085     },
1086 
1087     /// Vector widening operation.
1088     VecRRLong {
1089         op: VecRRLongOp,
1090         rd: Writable<Reg>,
1091         rn: Reg,
1092         high_half: bool,
1093     },
1094 
1095     /// Vector narrowing operation.
1096     VecRRNarrow {
1097         op: VecRRNarrowOp,
1098         rd: Writable<Reg>,
1099         rn: Reg,
1100         high_half: bool,
1101     },
1102 
1103     /// 1-operand vector instruction that operates on a pair of elements.
1104     VecRRPair {
1105         op: VecPairOp,
1106         rd: Writable<Reg>,
1107         rn: Reg,
1108     },
1109 
1110     /// 2-operand vector instruction that produces a result with twice the
1111     /// lane width and half the number of lanes.
1112     VecRRRLong {
1113         alu_op: VecRRRLongOp,
1114         rd: Writable<Reg>,
1115         rn: Reg,
1116         rm: Reg,
1117         high_half: bool,
1118     },
1119 
1120     /// 1-operand vector instruction that extends elements of the input
1121     /// register and operates on a pair of elements. The output lane width
1122     /// is double that of the input.
1123     VecRRPairLong {
1124         op: VecRRPairLongOp,
1125         rd: Writable<Reg>,
1126         rn: Reg,
1127     },
1128 
1129     /// A vector ALU op.
1130     VecRRR {
1131         alu_op: VecALUOp,
1132         rd: Writable<Reg>,
1133         rn: Reg,
1134         rm: Reg,
1135         size: VectorSize,
1136     },
1137 
1138     /// Vector two register miscellaneous instruction.
1139     VecMisc {
1140         op: VecMisc2,
1141         rd: Writable<Reg>,
1142         rn: Reg,
1143         size: VectorSize,
1144     },
1145 
1146     /// Vector instruction across lanes.
1147     VecLanes {
1148         op: VecLanesOp,
1149         rd: Writable<Reg>,
1150         rn: Reg,
1151         size: VectorSize,
1152     },
1153 
1154     /// Vector shift by immediate: Shift Left (immediate), Unsigned Shift Right (immediate),
1155     /// Signed Shift Right (immediate).  These are somewhat unusual in that, for right shifts,
1156     /// the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive.  A zero
1157     /// right-shift cannot be encoded.  Left shifts are "normal", though, having valid `imm`
1158     /// values from 0 to lane-size-in-bits - 1 inclusive.
1159     VecShiftImm {
1160         op: VecShiftImmOp,
1161         rd: Writable<Reg>,
1162         rn: Reg,
1163         size: VectorSize,
1164         imm: u8,
1165     },
1166 
1167     /// Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
1168     /// of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
1169     VecExtract {
1170         rd: Writable<Reg>,
1171         rn: Reg,
1172         rm: Reg,
1173         imm4: u8,
1174     },
1175 
1176     /// Table vector lookup - single register table. The table consists of 8-bit elements and is
1177     /// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
1178     /// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
1179     /// vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
1180     /// to 0.
1181     VecTbl {
1182         rd: Writable<Reg>,
1183         rn: Reg,
1184         rm: Reg,
1185         is_extension: bool,
1186     },
1187 
1188     /// Table vector lookup - two register table. The table consists of 8-bit elements and is
1189     /// stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
1190     /// specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
1191     /// the destination vector that correspond to out-of-range indices (greater than 31) unmodified
1192     /// or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
1193     /// modulo 32, that is v31 and v0 (in that order) are consecutive registers.
1194     VecTbl2 {
1195         rd: Writable<Reg>,
1196         rn: Reg,
1197         rn2: Reg,
1198         rm: Reg,
1199         is_extension: bool,
1200     },
1201 
1202     /// Load an element and replicate to all lanes of a vector.
1203     VecLoadReplicate {
1204         rd: Writable<Reg>,
1205         rn: Reg,
1206         size: VectorSize,
1207     },
1208 
1209     /// Vector conditional select, 128 bit.  A synthetic instruction, which generates a 4-insn
1210     /// control-flow diamond.
1211     VecCSel {
1212         rd: Writable<Reg>,
1213         rn: Reg,
1214         rm: Reg,
1215         cond: Cond,
1216     },
1217 
1218     /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
1219     MovToNZCV {
1220         rn: Reg,
1221     },
1222 
1223     /// Move from the NZCV flags (actually a `MRS Xn, NZCV` insn).
1224     MovFromNZCV {
1225         rd: Writable<Reg>,
1226     },
1227 
1228     /// A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation
1229     /// of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the
1230     /// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
1231     /// target.
1232     Call {
1233         info: Box<CallInfo>,
1234     },
1235     /// A machine indirect-call instruction.
1236     CallInd {
1237         info: Box<CallIndInfo>,
1238     },
1239 
1240     // ---- branches (exactly one must appear at end of BB) ----
1241     /// A machine return instruction.
1242     Ret,
1243 
1244     /// A placeholder instruction, generating no code, meaning that a function epilogue must be
1245     /// inserted there.
1246     EpiloguePlaceholder,
1247 
1248     /// An unconditional branch.
1249     Jump {
1250         dest: BranchTarget,
1251     },
1252 
1253     /// A conditional branch. Contains two targets; at emission time, both are emitted, but
1254     /// the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the
1255     /// choice of taken/not_taken (inverting the branch polarity as needed) based on the
1256     /// fallthrough at the time of lowering.
1257     CondBr {
1258         taken: BranchTarget,
1259         not_taken: BranchTarget,
1260         kind: CondBrKind,
1261     },
1262 
1263     /// A conditional trap: execute a `udf` if the condition is true. This is
1264     /// one VCode instruction because it uses embedded control flow; it is
1265     /// logically a single-in, single-out region, but needs to appear as one
1266     /// unit to the register allocator.
1267     ///
1268     /// The `CondBrKind` gives the conditional-branch condition that will
1269     /// *execute* the embedded `Inst`. (In the emitted code, we use the inverse
1270     /// of this condition in a branch that skips the trap instruction.)
1271     TrapIf {
1272         kind: CondBrKind,
1273         trap_code: TrapCode,
1274     },
1275 
1276     /// An indirect branch through a register, augmented with set of all
1277     /// possible successors.
1278     IndirectBr {
1279         rn: Reg,
1280         targets: Vec<MachLabel>,
1281     },
1282 
1283     /// A "break" instruction, used for e.g. traps and debug breakpoints.
1284     Brk,
1285 
1286     /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at
1287     /// runtime.
1288     Udf {
1289         trap_code: TrapCode,
1290     },
1291 
1292     /// Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
1293     /// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
1294     /// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
1295     /// need full `MemLabel` support.
1296     Adr {
1297         rd: Writable<Reg>,
1298         /// Offset in range -2^20 .. 2^20.
1299         off: i32,
1300     },
1301 
1302     /// Raw 32-bit word, used for inline constants and jump-table entries.
1303     Word4 {
1304         data: u32,
1305     },
1306 
1307     /// Raw 64-bit word, used for inline constants.
1308     Word8 {
1309         data: u64,
1310     },
1311 
1312     /// Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale).
1313     JTSequence {
1314         info: Box<JTSequenceInfo>,
1315         ridx: Reg,
1316         rtmp1: Writable<Reg>,
1317         rtmp2: Writable<Reg>,
1318     },
1319 
1320     /// Load an inline symbol reference.
1321     LoadExtName {
1322         rd: Writable<Reg>,
1323         name: Box<ExternalName>,
1324         offset: i64,
1325     },
1326 
1327     /// Load address referenced by `mem` into `rd`.
1328     LoadAddr {
1329         rd: Writable<Reg>,
1330         mem: AMode,
1331     },
1332 
1333     /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
1334     /// controls how AMode::NominalSPOffset args are lowered.
1335     VirtualSPOffsetAdj {
1336         offset: i64,
1337     },
1338 
1339     /// Meta-insn, no-op in generated code: emit constant/branch veneer island
1340     /// at this point (with a guard jump around it) if less than the needed
1341     /// space is available before the next branch deadline. See the `MachBuffer`
1342     /// implementation in `machinst/buffer.rs` for the overall algorithm. In
1343     /// brief, we retain a set of "pending/unresolved label references" from
1344     /// branches as we scan forward through instructions to emit machine code;
1345     /// if we notice we're about to go out of range on an unresolved reference,
1346     /// we stop, emit a bunch of "veneers" (branches in a form that has a longer
1347     /// range, e.g. a 26-bit-offset unconditional jump), and point the original
1348     /// label references to those. This is an "island" because it comes in the
1349     /// middle of the code.
1350     ///
1351     /// This meta-instruction is a necessary part of the logic that determines
1352     /// where to place islands. Ordinarily, we want to place them between basic
1353     /// blocks, so we compute the worst-case size of each block, and emit the
1354     /// island before starting a block if we would exceed a deadline before the
1355     /// end of the block. However, some sequences (such as an inline jumptable)
1356     /// are variable-length and not accounted for by this logic; so these
1357     /// lowered sequences include an `EmitIsland` to trigger island generation
1358     /// where necessary.
1359     EmitIsland {
1360         /// The needed space before the next deadline.
1361         needed_space: CodeOffset,
1362     },
1363 
1364     /// A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol in x0.
1365     ElfTlsGetAddr {
1366         symbol: ExternalName,
1367     },
1368 
1369     /// A definition of a value label.
1370     ValueLabelMarker {
1371         reg: Reg,
1372         label: ValueLabel,
1373     },
1374 
1375     /// An unwind pseudo-instruction.
1376     Unwind {
1377         inst: UnwindInst,
1378     },
1379 }
1380 
count_zero_half_words(mut value: u64, num_half_words: u8) -> usize1381 fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
1382     let mut count = 0;
1383     for _ in 0..num_half_words {
1384         if value & 0xffff == 0 {
1385             count += 1;
1386         }
1387         value >>= 16;
1388     }
1389 
1390     count
1391 }
1392 
1393 #[test]
inst_size_test()1394 fn inst_size_test() {
1395     // This test will help with unintentionally growing the size
1396     // of the Inst enum.
1397     assert_eq!(32, std::mem::size_of::<Inst>());
1398 }
1399 
1400 impl Inst {
1401     /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
1402     /// logical immediate, or constant pool).
load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]>1403     pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
1404         if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
1405             // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
1406             smallvec![Inst::MovZ {
1407                 rd,
1408                 imm,
1409                 size: OperandSize::Size64
1410             }]
1411         } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
1412             // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
1413             smallvec![Inst::MovN {
1414                 rd,
1415                 imm,
1416                 size: OperandSize::Size64
1417             }]
1418         } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
1419             // Weird logical-instruction immediate in ORI using zero register
1420             smallvec![Inst::AluRRImmLogic {
1421                 alu_op: ALUOp::Orr64,
1422                 rd,
1423                 rn: zero_reg(),
1424                 imml,
1425             }]
1426         } else {
1427             let mut insts = smallvec![];
1428 
1429             // If the top 32 bits are zero, use 32-bit `mov` operations.
1430             let (num_half_words, size, negated) = if value >> 32 == 0 {
1431                 (2, OperandSize::Size32, (!value << 32) >> 32)
1432             } else {
1433                 (4, OperandSize::Size64, !value)
1434             };
1435             // If the number of 0xffff half words is greater than the number of 0x0000 half words
1436             // it is more efficient to use `movn` for the first instruction.
1437             let first_is_inverted = count_zero_half_words(negated, num_half_words)
1438                 > count_zero_half_words(value, num_half_words);
1439             // Either 0xffff or 0x0000 half words can be skipped, depending on the first
1440             // instruction used.
1441             let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
1442             let mut first_mov_emitted = false;
1443 
1444             for i in 0..num_half_words {
1445                 let imm16 = (value >> (16 * i)) & 0xffff;
1446                 if imm16 != ignored_halfword {
1447                     if !first_mov_emitted {
1448                         first_mov_emitted = true;
1449                         if first_is_inverted {
1450                             let imm =
1451                                 MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
1452                                     .unwrap();
1453                             insts.push(Inst::MovN { rd, imm, size });
1454                         } else {
1455                             let imm =
1456                                 MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
1457                             insts.push(Inst::MovZ { rd, imm, size });
1458                         }
1459                     } else {
1460                         let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
1461                         insts.push(Inst::MovK { rd, imm, size });
1462                     }
1463                 }
1464             }
1465 
1466             assert!(first_mov_emitted);
1467 
1468             insts
1469         }
1470     }
1471 
1472     /// Create instructions that load a 128-bit constant.
load_constant128(to_regs: ValueRegs<Writable<Reg>>, value: u128) -> SmallVec<[Inst; 4]>1473     pub fn load_constant128(to_regs: ValueRegs<Writable<Reg>>, value: u128) -> SmallVec<[Inst; 4]> {
1474         assert_eq!(to_regs.len(), 2, "Expected to load i128 into two registers");
1475 
1476         let lower = value as u64;
1477         let upper = (value >> 64) as u64;
1478 
1479         let lower_reg = to_regs.regs()[0];
1480         let upper_reg = to_regs.regs()[1];
1481 
1482         let mut load_ins = Inst::load_constant(lower_reg, lower);
1483         let load_upper = Inst::load_constant(upper_reg, upper);
1484 
1485         load_ins.extend(load_upper.into_iter());
1486         load_ins
1487     }
1488 
1489     /// Create instructions that load a 32-bit floating-point constant.
load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>( rd: Writable<Reg>, value: u32, mut alloc_tmp: F, ) -> SmallVec<[Inst; 4]>1490     pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
1491         rd: Writable<Reg>,
1492         value: u32,
1493         mut alloc_tmp: F,
1494     ) -> SmallVec<[Inst; 4]> {
1495         // Note that we must make sure that all bits outside the lowest 32 are set to 0
1496         // because this function is also used to load wider constants (that have zeros
1497         // in their most significant bits).
1498         if value == 0 {
1499             smallvec![Inst::VecDupImm {
1500                 rd,
1501                 imm: ASIMDMovModImm::zero(ScalarSize::Size32),
1502                 invert: false,
1503                 size: VectorSize::Size32x2
1504             }]
1505         } else {
1506             // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
1507             // bits.
1508             let tmp = alloc_tmp(I32);
1509             let mut insts = Inst::load_constant(tmp, value as u64);
1510 
1511             insts.push(Inst::MovToFpu {
1512                 rd,
1513                 rn: tmp.to_reg(),
1514                 size: ScalarSize::Size64,
1515             });
1516 
1517             insts
1518         }
1519     }
1520 
1521     /// Create instructions that load a 64-bit floating-point constant.
load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>( rd: Writable<Reg>, const_data: u64, mut alloc_tmp: F, ) -> SmallVec<[Inst; 4]>1522     pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
1523         rd: Writable<Reg>,
1524         const_data: u64,
1525         mut alloc_tmp: F,
1526     ) -> SmallVec<[Inst; 4]> {
1527         // Note that we must make sure that all bits outside the lowest 64 are set to 0
1528         // because this function is also used to load wider constants (that have zeros
1529         // in their most significant bits).
1530         if let Ok(const_data) = u32::try_from(const_data) {
1531             Inst::load_fp_constant32(rd, const_data, alloc_tmp)
1532         // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
1533         // bits.  Also, treat it as half of a 128-bit vector and consider replicated
1534         // patterns. Scalar MOVI might also be an option.
1535         } else if const_data & (u32::MAX as u64) == 0 {
1536             let tmp = alloc_tmp(I64);
1537             let mut insts = Inst::load_constant(tmp, const_data);
1538 
1539             insts.push(Inst::MovToFpu {
1540                 rd,
1541                 rn: tmp.to_reg(),
1542                 size: ScalarSize::Size64,
1543             });
1544 
1545             insts
1546         } else {
1547             smallvec![Inst::LoadFpuConst64 { rd, const_data }]
1548         }
1549     }
1550 
1551     /// Create instructions that load a 128-bit vector constant.
load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>( rd: Writable<Reg>, const_data: u128, alloc_tmp: F, ) -> SmallVec<[Inst; 5]>1552     pub fn load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>(
1553         rd: Writable<Reg>,
1554         const_data: u128,
1555         alloc_tmp: F,
1556     ) -> SmallVec<[Inst; 5]> {
1557         if let Ok(const_data) = u64::try_from(const_data) {
1558             SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
1559         } else if let Some((pattern, size)) =
1560             Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
1561         {
1562             Inst::load_replicated_vector_pattern(
1563                 rd,
1564                 pattern,
1565                 VectorSize::from_lane_size(size, true),
1566                 alloc_tmp,
1567             )
1568         } else {
1569             smallvec![Inst::LoadFpuConst128 { rd, const_data }]
1570         }
1571     }
1572 
1573     /// Determine whether a 128-bit constant represents a vector consisting of elements with
1574     /// the same value.
get_replicated_vector_pattern( value: u128, size: ScalarSize, ) -> Option<(u64, ScalarSize)>1575     pub fn get_replicated_vector_pattern(
1576         value: u128,
1577         size: ScalarSize,
1578     ) -> Option<(u64, ScalarSize)> {
1579         let (mask, shift, next_size) = match size {
1580             ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
1581             ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
1582             ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
1583             ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
1584             _ => return None,
1585         };
1586         let mut r = None;
1587         let v = value & mask;
1588 
1589         if (value >> shift) & mask == v {
1590             r = Inst::get_replicated_vector_pattern(v, next_size);
1591 
1592             if r.is_none() {
1593                 r = Some((v as u64, size));
1594             }
1595         }
1596 
1597         r
1598     }
1599 
1600     /// Create instructions that load a vector constant consisting of elements with
1601     /// the same value.
load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>( rd: Writable<Reg>, pattern: u64, size: VectorSize, mut alloc_tmp: F, ) -> SmallVec<[Inst; 5]>1602     pub fn load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>(
1603         rd: Writable<Reg>,
1604         pattern: u64,
1605         size: VectorSize,
1606         mut alloc_tmp: F,
1607     ) -> SmallVec<[Inst; 5]> {
1608         let lane_size = size.lane_size();
1609         let widen_32_bit_pattern = |pattern, lane_size| {
1610             if lane_size == ScalarSize::Size32 {
1611                 let pattern = pattern as u32 as u64;
1612 
1613                 ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64)
1614             } else {
1615                 None
1616             }
1617         };
1618 
1619         if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
1620             smallvec![Inst::VecDupImm {
1621                 rd,
1622                 imm,
1623                 invert: false,
1624                 size
1625             }]
1626         } else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
1627             debug_assert_ne!(lane_size, ScalarSize::Size8);
1628             debug_assert_ne!(lane_size, ScalarSize::Size64);
1629 
1630             smallvec![Inst::VecDupImm {
1631                 rd,
1632                 imm,
1633                 invert: true,
1634                 size
1635             }]
1636         } else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) {
1637             let mut insts = smallvec![Inst::VecDupImm {
1638                 rd,
1639                 imm,
1640                 invert: false,
1641                 size: VectorSize::Size64x2,
1642             }];
1643 
1644             // TODO: Implement support for 64-bit scalar MOVI; we zero-extend the
1645             // lower 64 bits instead.
1646             if !size.is_128bits() {
1647                 insts.push(Inst::FpuExtend {
1648                     rd,
1649                     rn: rd.to_reg(),
1650                     size: ScalarSize::Size64,
1651                 });
1652             }
1653 
1654             insts
1655         } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) {
1656             smallvec![Inst::VecDupFPImm { rd, imm, size }]
1657         } else {
1658             let tmp = alloc_tmp(I64);
1659             let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
1660 
1661             insts.push(Inst::VecDup {
1662                 rd,
1663                 rn: tmp.to_reg(),
1664                 size,
1665             });
1666 
1667             insts
1668         }
1669     }
1670 
1671     /// Generic constructor for a load (zero-extending where appropriate).
gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst1672     pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
1673         match ty {
1674             B1 | B8 | I8 => Inst::ULoad8 {
1675                 rd: into_reg,
1676                 mem,
1677                 flags,
1678             },
1679             B16 | I16 => Inst::ULoad16 {
1680                 rd: into_reg,
1681                 mem,
1682                 flags,
1683             },
1684             B32 | I32 | R32 => Inst::ULoad32 {
1685                 rd: into_reg,
1686                 mem,
1687                 flags,
1688             },
1689             B64 | I64 | R64 => Inst::ULoad64 {
1690                 rd: into_reg,
1691                 mem,
1692                 flags,
1693             },
1694             F32 => Inst::FpuLoad32 {
1695                 rd: into_reg,
1696                 mem,
1697                 flags,
1698             },
1699             F64 => Inst::FpuLoad64 {
1700                 rd: into_reg,
1701                 mem,
1702                 flags,
1703             },
1704             _ => {
1705                 if ty.is_vector() {
1706                     let bits = ty_bits(ty);
1707                     let rd = into_reg;
1708 
1709                     if bits == 128 {
1710                         Inst::FpuLoad128 { rd, mem, flags }
1711                     } else {
1712                         assert_eq!(bits, 64);
1713                         Inst::FpuLoad64 { rd, mem, flags }
1714                     }
1715                 } else {
1716                     unimplemented!("gen_load({})", ty);
1717                 }
1718             }
1719         }
1720     }
1721 
1722     /// Generic constructor for a store.
gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst1723     pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
1724         match ty {
1725             B1 | B8 | I8 => Inst::Store8 {
1726                 rd: from_reg,
1727                 mem,
1728                 flags,
1729             },
1730             B16 | I16 => Inst::Store16 {
1731                 rd: from_reg,
1732                 mem,
1733                 flags,
1734             },
1735             B32 | I32 | R32 => Inst::Store32 {
1736                 rd: from_reg,
1737                 mem,
1738                 flags,
1739             },
1740             B64 | I64 | R64 => Inst::Store64 {
1741                 rd: from_reg,
1742                 mem,
1743                 flags,
1744             },
1745             F32 => Inst::FpuStore32 {
1746                 rd: from_reg,
1747                 mem,
1748                 flags,
1749             },
1750             F64 => Inst::FpuStore64 {
1751                 rd: from_reg,
1752                 mem,
1753                 flags,
1754             },
1755             _ => {
1756                 if ty.is_vector() {
1757                     let bits = ty_bits(ty);
1758                     let rd = from_reg;
1759 
1760                     if bits == 128 {
1761                         Inst::FpuStore128 { rd, mem, flags }
1762                     } else {
1763                         assert_eq!(bits, 64);
1764                         Inst::FpuStore64 { rd, mem, flags }
1765                     }
1766                 } else {
1767                     unimplemented!("gen_store({})", ty);
1768                 }
1769             }
1770         }
1771     }
1772 
1773     /// Generate a LoadAddr instruction (load address of an amode into
1774     /// register). Elides when possible (when amode is just a register). Returns
1775     /// destination register: either `rd` or a register directly from the amode.
gen_load_addr(rd: Writable<Reg>, mem: AMode) -> (Reg, Option<Inst>)1776     pub fn gen_load_addr(rd: Writable<Reg>, mem: AMode) -> (Reg, Option<Inst>) {
1777         if let Some(r) = mem.is_reg() {
1778             (r, None)
1779         } else {
1780             (rd.to_reg(), Some(Inst::LoadAddr { rd, mem }))
1781         }
1782     }
1783 }
1784 
1785 //=============================================================================
1786 // Instructions: get_regs
1787 
memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector)1788 fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) {
1789     match memarg {
1790         &AMode::Unscaled(reg, ..) | &AMode::UnsignedOffset(reg, ..) => {
1791             collector.add_use(reg);
1792         }
1793         &AMode::RegReg(r1, r2, ..)
1794         | &AMode::RegScaled(r1, r2, ..)
1795         | &AMode::RegScaledExtended(r1, r2, ..)
1796         | &AMode::RegExtended(r1, r2, ..) => {
1797             collector.add_use(r1);
1798             collector.add_use(r2);
1799         }
1800         &AMode::Label(..) => {}
1801         &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => {
1802             collector.add_mod(reg);
1803         }
1804         &AMode::FPOffset(..) => {
1805             collector.add_use(fp_reg());
1806         }
1807         &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {
1808             collector.add_use(stack_reg());
1809         }
1810         &AMode::RegOffset(r, ..) => {
1811             collector.add_use(r);
1812         }
1813     }
1814 }
1815 
pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector)1816 fn pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector) {
1817     match pairmemarg {
1818         &PairAMode::SignedOffset(reg, ..) => {
1819             collector.add_use(reg);
1820         }
1821         &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => {
1822             collector.add_mod(reg);
1823         }
1824     }
1825 }
1826 
aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector)1827 fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
1828     match inst {
1829         &Inst::AluRRR { rd, rn, rm, .. } => {
1830             collector.add_def(rd);
1831             collector.add_use(rn);
1832             collector.add_use(rm);
1833         }
1834         &Inst::AluRRRR { rd, rn, rm, ra, .. } => {
1835             collector.add_def(rd);
1836             collector.add_use(rn);
1837             collector.add_use(rm);
1838             collector.add_use(ra);
1839         }
1840         &Inst::AluRRImm12 { rd, rn, .. } => {
1841             collector.add_def(rd);
1842             collector.add_use(rn);
1843         }
1844         &Inst::AluRRImmLogic { rd, rn, .. } => {
1845             collector.add_def(rd);
1846             collector.add_use(rn);
1847         }
1848         &Inst::AluRRImmShift { rd, rn, .. } => {
1849             collector.add_def(rd);
1850             collector.add_use(rn);
1851         }
1852         &Inst::AluRRRShift { rd, rn, rm, .. } => {
1853             collector.add_def(rd);
1854             collector.add_use(rn);
1855             collector.add_use(rm);
1856         }
1857         &Inst::AluRRRExtend { rd, rn, rm, .. } => {
1858             collector.add_def(rd);
1859             collector.add_use(rn);
1860             collector.add_use(rm);
1861         }
1862         &Inst::BitRR { rd, rn, .. } => {
1863             collector.add_def(rd);
1864             collector.add_use(rn);
1865         }
1866         &Inst::ULoad8 { rd, ref mem, .. }
1867         | &Inst::SLoad8 { rd, ref mem, .. }
1868         | &Inst::ULoad16 { rd, ref mem, .. }
1869         | &Inst::SLoad16 { rd, ref mem, .. }
1870         | &Inst::ULoad32 { rd, ref mem, .. }
1871         | &Inst::SLoad32 { rd, ref mem, .. }
1872         | &Inst::ULoad64 { rd, ref mem, .. } => {
1873             collector.add_def(rd);
1874             memarg_regs(mem, collector);
1875         }
1876         &Inst::Store8 { rd, ref mem, .. }
1877         | &Inst::Store16 { rd, ref mem, .. }
1878         | &Inst::Store32 { rd, ref mem, .. }
1879         | &Inst::Store64 { rd, ref mem, .. } => {
1880             collector.add_use(rd);
1881             memarg_regs(mem, collector);
1882         }
1883         &Inst::StoreP64 {
1884             rt, rt2, ref mem, ..
1885         } => {
1886             collector.add_use(rt);
1887             collector.add_use(rt2);
1888             pairmemarg_regs(mem, collector);
1889         }
1890         &Inst::LoadP64 {
1891             rt, rt2, ref mem, ..
1892         } => {
1893             collector.add_def(rt);
1894             collector.add_def(rt2);
1895             pairmemarg_regs(mem, collector);
1896         }
1897         &Inst::Mov64 { rd, rm } => {
1898             collector.add_def(rd);
1899             collector.add_use(rm);
1900         }
1901         &Inst::Mov32 { rd, rm } => {
1902             collector.add_def(rd);
1903             collector.add_use(rm);
1904         }
1905         &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => {
1906             collector.add_def(rd);
1907         }
1908         &Inst::MovK { rd, .. } => {
1909             collector.add_mod(rd);
1910         }
1911         &Inst::CSel { rd, rn, rm, .. } => {
1912             collector.add_def(rd);
1913             collector.add_use(rn);
1914             collector.add_use(rm);
1915         }
1916         &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
1917             collector.add_def(rd);
1918         }
1919         &Inst::CCmpImm { rn, .. } => {
1920             collector.add_use(rn);
1921         }
1922         &Inst::AtomicRMW { .. } => {
1923             collector.add_use(xreg(25));
1924             collector.add_use(xreg(26));
1925             collector.add_def(writable_xreg(24));
1926             collector.add_def(writable_xreg(27));
1927             collector.add_def(writable_xreg(28));
1928         }
1929         &Inst::AtomicCAS { rs, rt, rn, .. } => {
1930             collector.add_mod(rs);
1931             collector.add_use(rt);
1932             collector.add_use(rn);
1933         }
1934         &Inst::AtomicCASLoop { .. } => {
1935             collector.add_use(xreg(25));
1936             collector.add_use(xreg(26));
1937             collector.add_use(xreg(28));
1938             collector.add_def(writable_xreg(24));
1939             collector.add_def(writable_xreg(27));
1940         }
1941         &Inst::LoadAcquire { rt, rn, .. } => {
1942             collector.add_use(rn);
1943             collector.add_def(rt);
1944         }
1945         &Inst::StoreRelease { rt, rn, .. } => {
1946             collector.add_use(rn);
1947             collector.add_use(rt);
1948         }
1949         &Inst::Fence {} => {}
1950         &Inst::FpuMove64 { rd, rn } => {
1951             collector.add_def(rd);
1952             collector.add_use(rn);
1953         }
1954         &Inst::FpuMove128 { rd, rn } => {
1955             collector.add_def(rd);
1956             collector.add_use(rn);
1957         }
1958         &Inst::FpuMoveFromVec { rd, rn, .. } => {
1959             collector.add_def(rd);
1960             collector.add_use(rn);
1961         }
1962         &Inst::FpuExtend { rd, rn, .. } => {
1963             collector.add_def(rd);
1964             collector.add_use(rn);
1965         }
1966         &Inst::FpuRR { rd, rn, .. } => {
1967             collector.add_def(rd);
1968             collector.add_use(rn);
1969         }
1970         &Inst::FpuRRR { rd, rn, rm, .. } => {
1971             collector.add_def(rd);
1972             collector.add_use(rn);
1973             collector.add_use(rm);
1974         }
1975         &Inst::FpuRRI { fpu_op, rd, rn, .. } => {
1976             match fpu_op {
1977                 FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.add_def(rd),
1978                 FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.add_mod(rd),
1979             }
1980             collector.add_use(rn);
1981         }
1982         &Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
1983             collector.add_def(rd);
1984             collector.add_use(rn);
1985             collector.add_use(rm);
1986             collector.add_use(ra);
1987         }
1988         &Inst::VecMisc { rd, rn, .. } => {
1989             collector.add_def(rd);
1990             collector.add_use(rn);
1991         }
1992 
1993         &Inst::VecLanes { rd, rn, .. } => {
1994             collector.add_def(rd);
1995             collector.add_use(rn);
1996         }
1997         &Inst::VecShiftImm { rd, rn, .. } => {
1998             collector.add_def(rd);
1999             collector.add_use(rn);
2000         }
2001         &Inst::VecExtract { rd, rn, rm, .. } => {
2002             collector.add_def(rd);
2003             collector.add_use(rn);
2004             collector.add_use(rm);
2005         }
2006         &Inst::VecTbl {
2007             rd,
2008             rn,
2009             rm,
2010             is_extension,
2011         } => {
2012             collector.add_use(rn);
2013             collector.add_use(rm);
2014 
2015             if is_extension {
2016                 collector.add_mod(rd);
2017             } else {
2018                 collector.add_def(rd);
2019             }
2020         }
2021         &Inst::VecTbl2 {
2022             rd,
2023             rn,
2024             rn2,
2025             rm,
2026             is_extension,
2027         } => {
2028             collector.add_use(rn);
2029             collector.add_use(rn2);
2030             collector.add_use(rm);
2031 
2032             if is_extension {
2033                 collector.add_mod(rd);
2034             } else {
2035                 collector.add_def(rd);
2036             }
2037         }
2038         &Inst::VecLoadReplicate { rd, rn, .. } => {
2039             collector.add_def(rd);
2040             collector.add_use(rn);
2041         }
2042         &Inst::VecCSel { rd, rn, rm, .. } => {
2043             collector.add_def(rd);
2044             collector.add_use(rn);
2045             collector.add_use(rm);
2046         }
2047         &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
2048             collector.add_use(rn);
2049             collector.add_use(rm);
2050         }
2051         &Inst::FpuLoad32 { rd, ref mem, .. } => {
2052             collector.add_def(rd);
2053             memarg_regs(mem, collector);
2054         }
2055         &Inst::FpuLoad64 { rd, ref mem, .. } => {
2056             collector.add_def(rd);
2057             memarg_regs(mem, collector);
2058         }
2059         &Inst::FpuLoad128 { rd, ref mem, .. } => {
2060             collector.add_def(rd);
2061             memarg_regs(mem, collector);
2062         }
2063         &Inst::FpuStore32 { rd, ref mem, .. } => {
2064             collector.add_use(rd);
2065             memarg_regs(mem, collector);
2066         }
2067         &Inst::FpuStore64 { rd, ref mem, .. } => {
2068             collector.add_use(rd);
2069             memarg_regs(mem, collector);
2070         }
2071         &Inst::FpuStore128 { rd, ref mem, .. } => {
2072             collector.add_use(rd);
2073             memarg_regs(mem, collector);
2074         }
2075         &Inst::FpuLoadP64 {
2076             rt, rt2, ref mem, ..
2077         } => {
2078             collector.add_def(rt);
2079             collector.add_def(rt2);
2080             pairmemarg_regs(mem, collector);
2081         }
2082         &Inst::FpuStoreP64 {
2083             rt, rt2, ref mem, ..
2084         } => {
2085             collector.add_use(rt);
2086             collector.add_use(rt2);
2087             pairmemarg_regs(mem, collector);
2088         }
2089         &Inst::FpuLoadP128 {
2090             rt, rt2, ref mem, ..
2091         } => {
2092             collector.add_def(rt);
2093             collector.add_def(rt2);
2094             pairmemarg_regs(mem, collector);
2095         }
2096         &Inst::FpuStoreP128 {
2097             rt, rt2, ref mem, ..
2098         } => {
2099             collector.add_use(rt);
2100             collector.add_use(rt2);
2101             pairmemarg_regs(mem, collector);
2102         }
2103         &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
2104             collector.add_def(rd);
2105         }
2106         &Inst::FpuToInt { rd, rn, .. } => {
2107             collector.add_def(rd);
2108             collector.add_use(rn);
2109         }
2110         &Inst::IntToFpu { rd, rn, .. } => {
2111             collector.add_def(rd);
2112             collector.add_use(rn);
2113         }
2114         &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => {
2115             collector.add_def(rd);
2116             collector.add_use(rn);
2117             collector.add_use(rm);
2118         }
2119         &Inst::FpuRound { rd, rn, .. } => {
2120             collector.add_def(rd);
2121             collector.add_use(rn);
2122         }
2123         &Inst::MovToFpu { rd, rn, .. } => {
2124             collector.add_def(rd);
2125             collector.add_use(rn);
2126         }
2127         &Inst::MovToVec { rd, rn, .. } => {
2128             collector.add_mod(rd);
2129             collector.add_use(rn);
2130         }
2131         &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
2132             collector.add_def(rd);
2133             collector.add_use(rn);
2134         }
2135         &Inst::VecDup { rd, rn, .. } => {
2136             collector.add_def(rd);
2137             collector.add_use(rn);
2138         }
2139         &Inst::VecDupFromFpu { rd, rn, .. } => {
2140             collector.add_def(rd);
2141             collector.add_use(rn);
2142         }
2143         &Inst::VecDupFPImm { rd, .. } => {
2144             collector.add_def(rd);
2145         }
2146         &Inst::VecDupImm { rd, .. } => {
2147             collector.add_def(rd);
2148         }
2149         &Inst::VecExtend { rd, rn, .. } => {
2150             collector.add_def(rd);
2151             collector.add_use(rn);
2152         }
2153         &Inst::VecMovElement { rd, rn, .. } => {
2154             collector.add_mod(rd);
2155             collector.add_use(rn);
2156         }
2157         &Inst::VecRRLong { rd, rn, .. } => {
2158             collector.add_def(rd);
2159             collector.add_use(rn);
2160         }
2161         &Inst::VecRRNarrow {
2162             rd, rn, high_half, ..
2163         } => {
2164             collector.add_use(rn);
2165 
2166             if high_half {
2167                 collector.add_mod(rd);
2168             } else {
2169                 collector.add_def(rd);
2170             }
2171         }
2172         &Inst::VecRRPair { rd, rn, .. } => {
2173             collector.add_def(rd);
2174             collector.add_use(rn);
2175         }
2176         &Inst::VecRRRLong {
2177             alu_op, rd, rn, rm, ..
2178         } => {
2179             match alu_op {
2180                 VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
2181                     collector.add_mod(rd)
2182                 }
2183                 _ => collector.add_def(rd),
2184             };
2185             collector.add_use(rn);
2186             collector.add_use(rm);
2187         }
2188         &Inst::VecRRPairLong { rd, rn, .. } => {
2189             collector.add_def(rd);
2190             collector.add_use(rn);
2191         }
2192         &Inst::VecRRR {
2193             alu_op, rd, rn, rm, ..
2194         } => {
2195             if alu_op == VecALUOp::Bsl {
2196                 collector.add_mod(rd);
2197             } else {
2198                 collector.add_def(rd);
2199             }
2200             collector.add_use(rn);
2201             collector.add_use(rm);
2202         }
2203         &Inst::MovToNZCV { rn } => {
2204             collector.add_use(rn);
2205         }
2206         &Inst::MovFromNZCV { rd } => {
2207             collector.add_def(rd);
2208         }
2209         &Inst::Extend { rd, rn, .. } => {
2210             collector.add_def(rd);
2211             collector.add_use(rn);
2212         }
2213         &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {}
2214         &Inst::Call { ref info, .. } => {
2215             collector.add_uses(&*info.uses);
2216             collector.add_defs(&*info.defs);
2217         }
2218         &Inst::CallInd { ref info, .. } => {
2219             collector.add_uses(&*info.uses);
2220             collector.add_defs(&*info.defs);
2221             collector.add_use(info.rn);
2222         }
2223         &Inst::CondBr { ref kind, .. } => match kind {
2224             CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
2225                 collector.add_use(*rt);
2226             }
2227             CondBrKind::Cond(_) => {}
2228         },
2229         &Inst::IndirectBr { rn, .. } => {
2230             collector.add_use(rn);
2231         }
2232         &Inst::Nop0 | Inst::Nop4 => {}
2233         &Inst::Brk => {}
2234         &Inst::Udf { .. } => {}
2235         &Inst::TrapIf { ref kind, .. } => match kind {
2236             CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
2237                 collector.add_use(*rt);
2238             }
2239             CondBrKind::Cond(_) => {}
2240         },
2241         &Inst::Adr { rd, .. } => {
2242             collector.add_def(rd);
2243         }
2244         &Inst::Word4 { .. } | &Inst::Word8 { .. } => {}
2245         &Inst::JTSequence {
2246             ridx, rtmp1, rtmp2, ..
2247         } => {
2248             collector.add_use(ridx);
2249             collector.add_def(rtmp1);
2250             collector.add_def(rtmp2);
2251         }
2252         &Inst::LoadExtName { rd, .. } => {
2253             collector.add_def(rd);
2254         }
2255         &Inst::LoadAddr { rd, ref mem } => {
2256             collector.add_def(rd);
2257             memarg_regs(mem, collector);
2258         }
2259         &Inst::VirtualSPOffsetAdj { .. } => {}
2260         &Inst::ValueLabelMarker { reg, .. } => {
2261             collector.add_use(reg);
2262         }
2263 
2264         &Inst::ElfTlsGetAddr { .. } => {
2265             for reg in AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV) {
2266                 collector.add_def(reg);
2267             }
2268         }
2269         &Inst::Unwind { .. } => {}
2270         &Inst::EmitIsland { .. } => {}
2271     }
2272 }
2273 
2274 //=============================================================================
2275 // Instructions: map_regs
2276 
aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM)2277 fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
2278     fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
2279         if r.is_virtual() {
2280             let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
2281             *r = new;
2282         }
2283     }
2284 
2285     fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
2286         if r.to_reg().is_virtual() {
2287             let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
2288             *r = Writable::from_reg(new);
2289         }
2290     }
2291 
2292     fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
2293         if r.to_reg().is_virtual() {
2294             let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
2295             *r = Writable::from_reg(new);
2296         }
2297     }
2298 
2299     fn map_mem<RUM: RegUsageMapper>(m: &RUM, mem: &mut AMode) {
2300         // N.B.: we take only the pre-map here, but this is OK because the
2301         // only addressing modes that update registers (pre/post-increment on
2302         // AArch64) both read and write registers, so they are "mods" rather
2303         // than "defs", so must be the same in both the pre- and post-map.
2304         match mem {
2305             &mut AMode::Unscaled(ref mut reg, ..) => map_use(m, reg),
2306             &mut AMode::UnsignedOffset(ref mut reg, ..) => map_use(m, reg),
2307             &mut AMode::RegReg(ref mut r1, ref mut r2)
2308             | &mut AMode::RegScaled(ref mut r1, ref mut r2, ..)
2309             | &mut AMode::RegScaledExtended(ref mut r1, ref mut r2, ..)
2310             | &mut AMode::RegExtended(ref mut r1, ref mut r2, ..) => {
2311                 map_use(m, r1);
2312                 map_use(m, r2);
2313             }
2314             &mut AMode::Label(..) => {}
2315             &mut AMode::PreIndexed(ref mut r, ..) => map_mod(m, r),
2316             &mut AMode::PostIndexed(ref mut r, ..) => map_mod(m, r),
2317             &mut AMode::FPOffset(..)
2318             | &mut AMode::SPOffset(..)
2319             | &mut AMode::NominalSPOffset(..) => {}
2320             &mut AMode::RegOffset(ref mut r, ..) => map_use(m, r),
2321         };
2322     }
2323 
2324     fn map_pairmem<RUM: RegUsageMapper>(m: &RUM, mem: &mut PairAMode) {
2325         match mem {
2326             &mut PairAMode::SignedOffset(ref mut reg, ..) => map_use(m, reg),
2327             &mut PairAMode::PreIndexed(ref mut reg, ..) => map_def(m, reg),
2328             &mut PairAMode::PostIndexed(ref mut reg, ..) => map_def(m, reg),
2329         }
2330     }
2331 
2332     fn map_br<RUM: RegUsageMapper>(m: &RUM, br: &mut CondBrKind) {
2333         match br {
2334             &mut CondBrKind::Zero(ref mut reg) => map_use(m, reg),
2335             &mut CondBrKind::NotZero(ref mut reg) => map_use(m, reg),
2336             &mut CondBrKind::Cond(..) => {}
2337         };
2338     }
2339 
2340     match inst {
2341         &mut Inst::AluRRR {
2342             ref mut rd,
2343             ref mut rn,
2344             ref mut rm,
2345             ..
2346         } => {
2347             map_def(mapper, rd);
2348             map_use(mapper, rn);
2349             map_use(mapper, rm);
2350         }
2351         &mut Inst::AluRRRR {
2352             ref mut rd,
2353             ref mut rn,
2354             ref mut rm,
2355             ref mut ra,
2356             ..
2357         } => {
2358             map_def(mapper, rd);
2359             map_use(mapper, rn);
2360             map_use(mapper, rm);
2361             map_use(mapper, ra);
2362         }
2363         &mut Inst::AluRRImm12 {
2364             ref mut rd,
2365             ref mut rn,
2366             ..
2367         } => {
2368             map_def(mapper, rd);
2369             map_use(mapper, rn);
2370         }
2371         &mut Inst::AluRRImmLogic {
2372             ref mut rd,
2373             ref mut rn,
2374             ..
2375         } => {
2376             map_def(mapper, rd);
2377             map_use(mapper, rn);
2378         }
2379         &mut Inst::AluRRImmShift {
2380             ref mut rd,
2381             ref mut rn,
2382             ..
2383         } => {
2384             map_def(mapper, rd);
2385             map_use(mapper, rn);
2386         }
2387         &mut Inst::AluRRRShift {
2388             ref mut rd,
2389             ref mut rn,
2390             ref mut rm,
2391             ..
2392         } => {
2393             map_def(mapper, rd);
2394             map_use(mapper, rn);
2395             map_use(mapper, rm);
2396         }
2397         &mut Inst::AluRRRExtend {
2398             ref mut rd,
2399             ref mut rn,
2400             ref mut rm,
2401             ..
2402         } => {
2403             map_def(mapper, rd);
2404             map_use(mapper, rn);
2405             map_use(mapper, rm);
2406         }
2407         &mut Inst::BitRR {
2408             ref mut rd,
2409             ref mut rn,
2410             ..
2411         } => {
2412             map_def(mapper, rd);
2413             map_use(mapper, rn);
2414         }
2415         &mut Inst::ULoad8 {
2416             ref mut rd,
2417             ref mut mem,
2418             ..
2419         } => {
2420             map_def(mapper, rd);
2421             map_mem(mapper, mem);
2422         }
2423         &mut Inst::SLoad8 {
2424             ref mut rd,
2425             ref mut mem,
2426             ..
2427         } => {
2428             map_def(mapper, rd);
2429             map_mem(mapper, mem);
2430         }
2431         &mut Inst::ULoad16 {
2432             ref mut rd,
2433             ref mut mem,
2434             ..
2435         } => {
2436             map_def(mapper, rd);
2437             map_mem(mapper, mem);
2438         }
2439         &mut Inst::SLoad16 {
2440             ref mut rd,
2441             ref mut mem,
2442             ..
2443         } => {
2444             map_def(mapper, rd);
2445             map_mem(mapper, mem);
2446         }
2447         &mut Inst::ULoad32 {
2448             ref mut rd,
2449             ref mut mem,
2450             ..
2451         } => {
2452             map_def(mapper, rd);
2453             map_mem(mapper, mem);
2454         }
2455         &mut Inst::SLoad32 {
2456             ref mut rd,
2457             ref mut mem,
2458             ..
2459         } => {
2460             map_def(mapper, rd);
2461             map_mem(mapper, mem);
2462         }
2463 
2464         &mut Inst::ULoad64 {
2465             ref mut rd,
2466             ref mut mem,
2467             ..
2468         } => {
2469             map_def(mapper, rd);
2470             map_mem(mapper, mem);
2471         }
2472         &mut Inst::Store8 {
2473             ref mut rd,
2474             ref mut mem,
2475             ..
2476         } => {
2477             map_use(mapper, rd);
2478             map_mem(mapper, mem);
2479         }
2480         &mut Inst::Store16 {
2481             ref mut rd,
2482             ref mut mem,
2483             ..
2484         } => {
2485             map_use(mapper, rd);
2486             map_mem(mapper, mem);
2487         }
2488         &mut Inst::Store32 {
2489             ref mut rd,
2490             ref mut mem,
2491             ..
2492         } => {
2493             map_use(mapper, rd);
2494             map_mem(mapper, mem);
2495         }
2496         &mut Inst::Store64 {
2497             ref mut rd,
2498             ref mut mem,
2499             ..
2500         } => {
2501             map_use(mapper, rd);
2502             map_mem(mapper, mem);
2503         }
2504 
2505         &mut Inst::StoreP64 {
2506             ref mut rt,
2507             ref mut rt2,
2508             ref mut mem,
2509             ..
2510         } => {
2511             map_use(mapper, rt);
2512             map_use(mapper, rt2);
2513             map_pairmem(mapper, mem);
2514         }
2515         &mut Inst::LoadP64 {
2516             ref mut rt,
2517             ref mut rt2,
2518             ref mut mem,
2519             ..
2520         } => {
2521             map_def(mapper, rt);
2522             map_def(mapper, rt2);
2523             map_pairmem(mapper, mem);
2524         }
2525         &mut Inst::Mov64 {
2526             ref mut rd,
2527             ref mut rm,
2528         } => {
2529             map_def(mapper, rd);
2530             map_use(mapper, rm);
2531         }
2532         &mut Inst::Mov32 {
2533             ref mut rd,
2534             ref mut rm,
2535         } => {
2536             map_def(mapper, rd);
2537             map_use(mapper, rm);
2538         }
2539         &mut Inst::MovZ { ref mut rd, .. } => {
2540             map_def(mapper, rd);
2541         }
2542         &mut Inst::MovN { ref mut rd, .. } => {
2543             map_def(mapper, rd);
2544         }
2545         &mut Inst::MovK { ref mut rd, .. } => {
2546             map_def(mapper, rd);
2547         }
2548         &mut Inst::CSel {
2549             ref mut rd,
2550             ref mut rn,
2551             ref mut rm,
2552             ..
2553         } => {
2554             map_def(mapper, rd);
2555             map_use(mapper, rn);
2556             map_use(mapper, rm);
2557         }
2558         &mut Inst::CSet { ref mut rd, .. } | &mut Inst::CSetm { ref mut rd, .. } => {
2559             map_def(mapper, rd);
2560         }
2561         &mut Inst::CCmpImm { ref mut rn, .. } => {
2562             map_use(mapper, rn);
2563         }
2564         &mut Inst::AtomicRMW { .. } => {
2565             // There are no vregs to map in this insn.
2566         }
2567         &mut Inst::AtomicCAS {
2568             ref mut rs,
2569             ref mut rt,
2570             ref mut rn,
2571             ..
2572         } => {
2573             map_mod(mapper, rs);
2574             map_use(mapper, rt);
2575             map_use(mapper, rn);
2576         }
2577         &mut Inst::AtomicCASLoop { .. } => {
2578             // There are no vregs to map in this insn.
2579         }
2580         &mut Inst::LoadAcquire {
2581             ref mut rt,
2582             ref mut rn,
2583             ..
2584         } => {
2585             map_def(mapper, rt);
2586             map_use(mapper, rn);
2587         }
2588         &mut Inst::StoreRelease {
2589             ref mut rt,
2590             ref mut rn,
2591             ..
2592         } => {
2593             map_use(mapper, rt);
2594             map_use(mapper, rn);
2595         }
2596         &mut Inst::Fence {} => {}
2597         &mut Inst::FpuMove64 {
2598             ref mut rd,
2599             ref mut rn,
2600         } => {
2601             map_def(mapper, rd);
2602             map_use(mapper, rn);
2603         }
2604         &mut Inst::FpuMove128 {
2605             ref mut rd,
2606             ref mut rn,
2607         } => {
2608             map_def(mapper, rd);
2609             map_use(mapper, rn);
2610         }
2611         &mut Inst::FpuMoveFromVec {
2612             ref mut rd,
2613             ref mut rn,
2614             ..
2615         } => {
2616             map_def(mapper, rd);
2617             map_use(mapper, rn);
2618         }
2619         &mut Inst::FpuExtend {
2620             ref mut rd,
2621             ref mut rn,
2622             ..
2623         } => {
2624             map_def(mapper, rd);
2625             map_use(mapper, rn);
2626         }
2627         &mut Inst::FpuRR {
2628             ref mut rd,
2629             ref mut rn,
2630             ..
2631         } => {
2632             map_def(mapper, rd);
2633             map_use(mapper, rn);
2634         }
2635         &mut Inst::FpuRRR {
2636             ref mut rd,
2637             ref mut rn,
2638             ref mut rm,
2639             ..
2640         } => {
2641             map_def(mapper, rd);
2642             map_use(mapper, rn);
2643             map_use(mapper, rm);
2644         }
2645         &mut Inst::FpuRRI {
2646             fpu_op,
2647             ref mut rd,
2648             ref mut rn,
2649             ..
2650         } => {
2651             match fpu_op {
2652                 FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => map_def(mapper, rd),
2653                 FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => map_mod(mapper, rd),
2654             }
2655             map_use(mapper, rn);
2656         }
2657         &mut Inst::FpuRRRR {
2658             ref mut rd,
2659             ref mut rn,
2660             ref mut rm,
2661             ref mut ra,
2662             ..
2663         } => {
2664             map_def(mapper, rd);
2665             map_use(mapper, rn);
2666             map_use(mapper, rm);
2667             map_use(mapper, ra);
2668         }
2669         &mut Inst::VecMisc {
2670             ref mut rd,
2671             ref mut rn,
2672             ..
2673         } => {
2674             map_def(mapper, rd);
2675             map_use(mapper, rn);
2676         }
2677         &mut Inst::VecLanes {
2678             ref mut rd,
2679             ref mut rn,
2680             ..
2681         } => {
2682             map_def(mapper, rd);
2683             map_use(mapper, rn);
2684         }
2685         &mut Inst::VecShiftImm {
2686             ref mut rd,
2687             ref mut rn,
2688             ..
2689         } => {
2690             map_def(mapper, rd);
2691             map_use(mapper, rn);
2692         }
2693         &mut Inst::VecExtract {
2694             ref mut rd,
2695             ref mut rn,
2696             ref mut rm,
2697             ..
2698         } => {
2699             map_def(mapper, rd);
2700             map_use(mapper, rn);
2701             map_use(mapper, rm);
2702         }
2703         &mut Inst::VecTbl {
2704             ref mut rd,
2705             ref mut rn,
2706             ref mut rm,
2707             is_extension,
2708         } => {
2709             map_use(mapper, rn);
2710             map_use(mapper, rm);
2711 
2712             if is_extension {
2713                 map_mod(mapper, rd);
2714             } else {
2715                 map_def(mapper, rd);
2716             }
2717         }
2718         &mut Inst::VecTbl2 {
2719             ref mut rd,
2720             ref mut rn,
2721             ref mut rn2,
2722             ref mut rm,
2723             is_extension,
2724         } => {
2725             map_use(mapper, rn);
2726             map_use(mapper, rn2);
2727             map_use(mapper, rm);
2728 
2729             if is_extension {
2730                 map_mod(mapper, rd);
2731             } else {
2732                 map_def(mapper, rd);
2733             }
2734         }
2735         &mut Inst::VecLoadReplicate {
2736             ref mut rd,
2737             ref mut rn,
2738             ..
2739         } => {
2740             map_def(mapper, rd);
2741             map_use(mapper, rn);
2742         }
2743         &mut Inst::VecCSel {
2744             ref mut rd,
2745             ref mut rn,
2746             ref mut rm,
2747             ..
2748         } => {
2749             map_def(mapper, rd);
2750             map_use(mapper, rn);
2751             map_use(mapper, rm);
2752         }
2753         &mut Inst::FpuCmp32 {
2754             ref mut rn,
2755             ref mut rm,
2756         } => {
2757             map_use(mapper, rn);
2758             map_use(mapper, rm);
2759         }
2760         &mut Inst::FpuCmp64 {
2761             ref mut rn,
2762             ref mut rm,
2763         } => {
2764             map_use(mapper, rn);
2765             map_use(mapper, rm);
2766         }
2767         &mut Inst::FpuLoad32 {
2768             ref mut rd,
2769             ref mut mem,
2770             ..
2771         } => {
2772             map_def(mapper, rd);
2773             map_mem(mapper, mem);
2774         }
2775         &mut Inst::FpuLoad64 {
2776             ref mut rd,
2777             ref mut mem,
2778             ..
2779         } => {
2780             map_def(mapper, rd);
2781             map_mem(mapper, mem);
2782         }
2783         &mut Inst::FpuLoad128 {
2784             ref mut rd,
2785             ref mut mem,
2786             ..
2787         } => {
2788             map_def(mapper, rd);
2789             map_mem(mapper, mem);
2790         }
2791         &mut Inst::FpuStore32 {
2792             ref mut rd,
2793             ref mut mem,
2794             ..
2795         } => {
2796             map_use(mapper, rd);
2797             map_mem(mapper, mem);
2798         }
2799         &mut Inst::FpuStore64 {
2800             ref mut rd,
2801             ref mut mem,
2802             ..
2803         } => {
2804             map_use(mapper, rd);
2805             map_mem(mapper, mem);
2806         }
2807         &mut Inst::FpuStore128 {
2808             ref mut rd,
2809             ref mut mem,
2810             ..
2811         } => {
2812             map_use(mapper, rd);
2813             map_mem(mapper, mem);
2814         }
2815         &mut Inst::FpuLoadP64 {
2816             ref mut rt,
2817             ref mut rt2,
2818             ref mut mem,
2819             ..
2820         } => {
2821             map_def(mapper, rt);
2822             map_def(mapper, rt2);
2823             map_pairmem(mapper, mem);
2824         }
2825         &mut Inst::FpuStoreP64 {
2826             ref mut rt,
2827             ref mut rt2,
2828             ref mut mem,
2829             ..
2830         } => {
2831             map_use(mapper, rt);
2832             map_use(mapper, rt2);
2833             map_pairmem(mapper, mem);
2834         }
2835         &mut Inst::FpuLoadP128 {
2836             ref mut rt,
2837             ref mut rt2,
2838             ref mut mem,
2839             ..
2840         } => {
2841             map_def(mapper, rt);
2842             map_def(mapper, rt2);
2843             map_pairmem(mapper, mem);
2844         }
2845         &mut Inst::FpuStoreP128 {
2846             ref mut rt,
2847             ref mut rt2,
2848             ref mut mem,
2849             ..
2850         } => {
2851             map_use(mapper, rt);
2852             map_use(mapper, rt2);
2853             map_pairmem(mapper, mem);
2854         }
2855         &mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
2856             map_def(mapper, rd);
2857         }
2858         &mut Inst::LoadFpuConst128 { ref mut rd, .. } => {
2859             map_def(mapper, rd);
2860         }
2861         &mut Inst::FpuToInt {
2862             ref mut rd,
2863             ref mut rn,
2864             ..
2865         } => {
2866             map_def(mapper, rd);
2867             map_use(mapper, rn);
2868         }
2869         &mut Inst::IntToFpu {
2870             ref mut rd,
2871             ref mut rn,
2872             ..
2873         } => {
2874             map_def(mapper, rd);
2875             map_use(mapper, rn);
2876         }
2877         &mut Inst::FpuCSel32 {
2878             ref mut rd,
2879             ref mut rn,
2880             ref mut rm,
2881             ..
2882         } => {
2883             map_def(mapper, rd);
2884             map_use(mapper, rn);
2885             map_use(mapper, rm);
2886         }
2887         &mut Inst::FpuCSel64 {
2888             ref mut rd,
2889             ref mut rn,
2890             ref mut rm,
2891             ..
2892         } => {
2893             map_def(mapper, rd);
2894             map_use(mapper, rn);
2895             map_use(mapper, rm);
2896         }
2897         &mut Inst::FpuRound {
2898             ref mut rd,
2899             ref mut rn,
2900             ..
2901         } => {
2902             map_def(mapper, rd);
2903             map_use(mapper, rn);
2904         }
2905         &mut Inst::MovToFpu {
2906             ref mut rd,
2907             ref mut rn,
2908             ..
2909         } => {
2910             map_def(mapper, rd);
2911             map_use(mapper, rn);
2912         }
2913         &mut Inst::MovToVec {
2914             ref mut rd,
2915             ref mut rn,
2916             ..
2917         } => {
2918             map_mod(mapper, rd);
2919             map_use(mapper, rn);
2920         }
2921         &mut Inst::MovFromVec {
2922             ref mut rd,
2923             ref mut rn,
2924             ..
2925         }
2926         | &mut Inst::MovFromVecSigned {
2927             ref mut rd,
2928             ref mut rn,
2929             ..
2930         } => {
2931             map_def(mapper, rd);
2932             map_use(mapper, rn);
2933         }
2934         &mut Inst::VecDup {
2935             ref mut rd,
2936             ref mut rn,
2937             ..
2938         } => {
2939             map_def(mapper, rd);
2940             map_use(mapper, rn);
2941         }
2942         &mut Inst::VecDupFromFpu {
2943             ref mut rd,
2944             ref mut rn,
2945             ..
2946         } => {
2947             map_def(mapper, rd);
2948             map_use(mapper, rn);
2949         }
2950         &mut Inst::VecDupFPImm { ref mut rd, .. } => {
2951             map_def(mapper, rd);
2952         }
2953         &mut Inst::VecDupImm { ref mut rd, .. } => {
2954             map_def(mapper, rd);
2955         }
2956         &mut Inst::VecExtend {
2957             ref mut rd,
2958             ref mut rn,
2959             ..
2960         } => {
2961             map_def(mapper, rd);
2962             map_use(mapper, rn);
2963         }
2964         &mut Inst::VecMovElement {
2965             ref mut rd,
2966             ref mut rn,
2967             ..
2968         } => {
2969             map_mod(mapper, rd);
2970             map_use(mapper, rn);
2971         }
2972         &mut Inst::VecRRLong {
2973             ref mut rd,
2974             ref mut rn,
2975             ..
2976         } => {
2977             map_def(mapper, rd);
2978             map_use(mapper, rn);
2979         }
2980         &mut Inst::VecRRNarrow {
2981             ref mut rd,
2982             ref mut rn,
2983             high_half,
2984             ..
2985         } => {
2986             map_use(mapper, rn);
2987 
2988             if high_half {
2989                 map_mod(mapper, rd);
2990             } else {
2991                 map_def(mapper, rd);
2992             }
2993         }
2994         &mut Inst::VecRRPair {
2995             ref mut rd,
2996             ref mut rn,
2997             ..
2998         } => {
2999             map_def(mapper, rd);
3000             map_use(mapper, rn);
3001         }
3002         &mut Inst::VecRRRLong {
3003             alu_op,
3004             ref mut rd,
3005             ref mut rn,
3006             ref mut rm,
3007             ..
3008         } => {
3009             match alu_op {
3010                 VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
3011                     map_mod(mapper, rd)
3012                 }
3013                 _ => map_def(mapper, rd),
3014             };
3015             map_use(mapper, rn);
3016             map_use(mapper, rm);
3017         }
3018         &mut Inst::VecRRPairLong {
3019             ref mut rd,
3020             ref mut rn,
3021             ..
3022         } => {
3023             map_def(mapper, rd);
3024             map_use(mapper, rn);
3025         }
3026         &mut Inst::VecRRR {
3027             alu_op,
3028             ref mut rd,
3029             ref mut rn,
3030             ref mut rm,
3031             ..
3032         } => {
3033             if alu_op == VecALUOp::Bsl {
3034                 map_mod(mapper, rd);
3035             } else {
3036                 map_def(mapper, rd);
3037             }
3038             map_use(mapper, rn);
3039             map_use(mapper, rm);
3040         }
3041         &mut Inst::MovToNZCV { ref mut rn } => {
3042             map_use(mapper, rn);
3043         }
3044         &mut Inst::MovFromNZCV { ref mut rd } => {
3045             map_def(mapper, rd);
3046         }
3047         &mut Inst::Extend {
3048             ref mut rd,
3049             ref mut rn,
3050             ..
3051         } => {
3052             map_def(mapper, rd);
3053             map_use(mapper, rn);
3054         }
3055         &mut Inst::Jump { .. } => {}
3056         &mut Inst::Call { ref mut info } => {
3057             for r in info.uses.iter_mut() {
3058                 map_use(mapper, r);
3059             }
3060             for r in info.defs.iter_mut() {
3061                 map_def(mapper, r);
3062             }
3063         }
3064         &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
3065         &mut Inst::CallInd { ref mut info, .. } => {
3066             for r in info.uses.iter_mut() {
3067                 map_use(mapper, r);
3068             }
3069             for r in info.defs.iter_mut() {
3070                 map_def(mapper, r);
3071             }
3072             map_use(mapper, &mut info.rn);
3073         }
3074         &mut Inst::CondBr { ref mut kind, .. } => {
3075             map_br(mapper, kind);
3076         }
3077         &mut Inst::IndirectBr { ref mut rn, .. } => {
3078             map_use(mapper, rn);
3079         }
3080         &mut Inst::Nop0 | &mut Inst::Nop4 | &mut Inst::Brk | &mut Inst::Udf { .. } => {}
3081         &mut Inst::TrapIf { ref mut kind, .. } => {
3082             map_br(mapper, kind);
3083         }
3084         &mut Inst::Adr { ref mut rd, .. } => {
3085             map_def(mapper, rd);
3086         }
3087         &mut Inst::Word4 { .. } | &mut Inst::Word8 { .. } => {}
3088         &mut Inst::JTSequence {
3089             ref mut ridx,
3090             ref mut rtmp1,
3091             ref mut rtmp2,
3092             ..
3093         } => {
3094             map_use(mapper, ridx);
3095             map_def(mapper, rtmp1);
3096             map_def(mapper, rtmp2);
3097         }
3098         &mut Inst::LoadExtName { ref mut rd, .. } => {
3099             map_def(mapper, rd);
3100         }
3101         &mut Inst::LoadAddr {
3102             ref mut rd,
3103             ref mut mem,
3104         } => {
3105             map_def(mapper, rd);
3106             map_mem(mapper, mem);
3107         }
3108         &mut Inst::VirtualSPOffsetAdj { .. } => {}
3109         &mut Inst::EmitIsland { .. } => {}
3110         &mut Inst::ElfTlsGetAddr { .. } => {}
3111         &mut Inst::ValueLabelMarker { ref mut reg, .. } => {
3112             map_use(mapper, reg);
3113         }
3114         &mut Inst::Unwind { .. } => {}
3115     }
3116 }
3117 
3118 //=============================================================================
3119 // Instructions: misc functions and external interface
3120 
3121 impl MachInst for Inst {
3122     type LabelUse = LabelUse;
3123 
get_regs(&self, collector: &mut RegUsageCollector)3124     fn get_regs(&self, collector: &mut RegUsageCollector) {
3125         aarch64_get_regs(self, collector)
3126     }
3127 
map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM)3128     fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
3129         aarch64_map_regs(self, mapper);
3130     }
3131 
is_move(&self) -> Option<(Writable<Reg>, Reg)>3132     fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
3133         match self {
3134             &Inst::Mov64 { rd, rm } => Some((rd, rm)),
3135             &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
3136             &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
3137             _ => None,
3138         }
3139     }
3140 
is_epilogue_placeholder(&self) -> bool3141     fn is_epilogue_placeholder(&self) -> bool {
3142         if let Inst::EpiloguePlaceholder = self {
3143             true
3144         } else {
3145             false
3146         }
3147     }
3148 
is_included_in_clobbers(&self) -> bool3149     fn is_included_in_clobbers(&self) -> bool {
3150         // We exclude call instructions from the clobber-set when they are calls
3151         // from caller to callee with the same ABI. Such calls cannot possibly
3152         // force any new registers to be saved in the prologue, because anything
3153         // that the callee clobbers, the caller is also allowed to clobber. This
3154         // both saves work and enables us to more precisely follow the
3155         // half-caller-save, half-callee-save SysV ABI for some vector
3156         // registers.
3157         //
3158         // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
3159         // more information on this ABI-implementation hack.
3160         match self {
3161             &Inst::Call { ref info } => info.caller_callconv != info.callee_callconv,
3162             &Inst::CallInd { ref info } => info.caller_callconv != info.callee_callconv,
3163             _ => true,
3164         }
3165     }
3166 
is_term<'a>(&'a self) -> MachTerminator<'a>3167     fn is_term<'a>(&'a self) -> MachTerminator<'a> {
3168         match self {
3169             &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
3170             &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
3171             &Inst::CondBr {
3172                 taken, not_taken, ..
3173             } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
3174             &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]),
3175             &Inst::JTSequence { ref info, .. } => {
3176                 MachTerminator::Indirect(&info.targets_for_term[..])
3177             }
3178             _ => MachTerminator::None,
3179         }
3180     }
3181 
gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst3182     fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
3183         let bits = ty.bits();
3184 
3185         assert!(bits <= 128);
3186         assert!(to_reg.to_reg().get_class() == from_reg.get_class());
3187 
3188         if from_reg.get_class() == RegClass::I64 {
3189             Inst::Mov64 {
3190                 rd: to_reg,
3191                 rm: from_reg,
3192             }
3193         } else if from_reg.get_class() == RegClass::V128 {
3194             if bits > 64 {
3195                 Inst::FpuMove128 {
3196                     rd: to_reg,
3197                     rn: from_reg,
3198                 }
3199             } else {
3200                 Inst::FpuMove64 {
3201                     rd: to_reg,
3202                     rn: from_reg,
3203                 }
3204             }
3205         } else {
3206             panic!("Unexpected register class: {:?}", from_reg.get_class());
3207         }
3208     }
3209 
gen_constant<F: FnMut(Type) -> Writable<Reg>>( to_regs: ValueRegs<Writable<Reg>>, value: u128, ty: Type, alloc_tmp: F, ) -> SmallVec<[Inst; 4]>3210     fn gen_constant<F: FnMut(Type) -> Writable<Reg>>(
3211         to_regs: ValueRegs<Writable<Reg>>,
3212         value: u128,
3213         ty: Type,
3214         alloc_tmp: F,
3215     ) -> SmallVec<[Inst; 4]> {
3216         let to_reg = to_regs.only_reg();
3217         match ty {
3218             F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp),
3219             F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp),
3220             B1 | B8 | B16 | B32 | B64 | I8 | I16 | I32 | I64 | R32 | R64 => {
3221                 Inst::load_constant(to_reg.unwrap(), value as u64)
3222             }
3223             I128 => Inst::load_constant128(to_regs, value),
3224             _ => panic!("Cannot generate constant for type: {}", ty),
3225         }
3226     }
3227 
gen_nop(preferred_size: usize) -> Inst3228     fn gen_nop(preferred_size: usize) -> Inst {
3229         if preferred_size == 0 {
3230             return Inst::Nop0;
3231         }
3232         // We can't give a NOP (or any insn) < 4 bytes.
3233         assert!(preferred_size >= 4);
3234         Inst::Nop4
3235     }
3236 
maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst>3237     fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
3238         None
3239     }
3240 
rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>3241     fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
3242         match ty {
3243             I8 => Ok((&[RegClass::I64], &[I8])),
3244             I16 => Ok((&[RegClass::I64], &[I16])),
3245             I32 => Ok((&[RegClass::I64], &[I32])),
3246             I64 => Ok((&[RegClass::I64], &[I64])),
3247             B1 => Ok((&[RegClass::I64], &[B1])),
3248             B8 => Ok((&[RegClass::I64], &[B8])),
3249             B16 => Ok((&[RegClass::I64], &[B16])),
3250             B32 => Ok((&[RegClass::I64], &[B32])),
3251             B64 => Ok((&[RegClass::I64], &[B64])),
3252             R32 => panic!("32-bit reftype pointer should never be seen on AArch64"),
3253             R64 => Ok((&[RegClass::I64], &[R64])),
3254             F32 => Ok((&[RegClass::V128], &[F32])),
3255             F64 => Ok((&[RegClass::V128], &[F64])),
3256             I128 => Ok((&[RegClass::I64, RegClass::I64], &[I64, I64])),
3257             B128 => Ok((&[RegClass::I64, RegClass::I64], &[B64, B64])),
3258             _ if ty.is_vector() => {
3259                 assert!(ty.bits() <= 128);
3260                 Ok((&[RegClass::V128], &[I8X16]))
3261             }
3262             IFLAGS | FFLAGS => Ok((&[RegClass::I64], &[I64])),
3263             _ => Err(CodegenError::Unsupported(format!(
3264                 "Unexpected SSA-value type: {}",
3265                 ty
3266             ))),
3267         }
3268     }
3269 
gen_jump(target: MachLabel) -> Inst3270     fn gen_jump(target: MachLabel) -> Inst {
3271         Inst::Jump {
3272             dest: BranchTarget::Label(target),
3273         }
3274     }
3275 
reg_universe(flags: &settings::Flags) -> RealRegUniverse3276     fn reg_universe(flags: &settings::Flags) -> RealRegUniverse {
3277         create_reg_universe(flags)
3278     }
3279 
worst_case_size() -> CodeOffset3280     fn worst_case_size() -> CodeOffset {
3281         // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
3282         // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
3283         // 64-bit f64 constants.
3284         //
3285         // Note that inline jump-tables handle island/pool insertion separately, so we do not need
3286         // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
3287         // feasible for other reasons).
3288         44
3289     }
3290 
ref_type_regclass(_: &settings::Flags) -> RegClass3291     fn ref_type_regclass(_: &settings::Flags) -> RegClass {
3292         RegClass::I64
3293     }
3294 
gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self3295     fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self {
3296         Inst::ValueLabelMarker { label, reg }
3297     }
3298 
defines_value_label(&self) -> Option<(ValueLabel, Reg)>3299     fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
3300         match self {
3301             Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)),
3302             _ => None,
3303         }
3304     }
3305 }
3306 
3307 //=============================================================================
3308 // Pretty-printing of instructions.
3309 
mem_finalize_for_show( mem: &AMode, mb_rru: Option<&RealRegUniverse>, state: &EmitState, ) -> (String, AMode)3310 fn mem_finalize_for_show(
3311     mem: &AMode,
3312     mb_rru: Option<&RealRegUniverse>,
3313     state: &EmitState,
3314 ) -> (String, AMode) {
3315     let (mem_insts, mem) = mem_finalize(0, mem, state);
3316     let mut mem_str = mem_insts
3317         .into_iter()
3318         .map(|inst| inst.show_rru(mb_rru))
3319         .collect::<Vec<_>>()
3320         .join(" ; ");
3321     if !mem_str.is_empty() {
3322         mem_str += " ; ";
3323     }
3324 
3325     (mem_str, mem)
3326 }
3327 
3328 impl PrettyPrint for Inst {
show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String3329     fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
3330         self.pretty_print(mb_rru, &mut EmitState::default())
3331     }
3332 }
3333 
3334 impl Inst {
print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String3335     fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
3336         fn op_name_size(alu_op: ALUOp) -> (&'static str, OperandSize) {
3337             match alu_op {
3338                 ALUOp::Add32 => ("add", OperandSize::Size32),
3339                 ALUOp::Add64 => ("add", OperandSize::Size64),
3340                 ALUOp::Sub32 => ("sub", OperandSize::Size32),
3341                 ALUOp::Sub64 => ("sub", OperandSize::Size64),
3342                 ALUOp::Orr32 => ("orr", OperandSize::Size32),
3343                 ALUOp::Orr64 => ("orr", OperandSize::Size64),
3344                 ALUOp::And32 => ("and", OperandSize::Size32),
3345                 ALUOp::And64 => ("and", OperandSize::Size64),
3346                 ALUOp::AndS32 => ("ands", OperandSize::Size32),
3347                 ALUOp::AndS64 => ("ands", OperandSize::Size64),
3348                 ALUOp::Eor32 => ("eor", OperandSize::Size32),
3349                 ALUOp::Eor64 => ("eor", OperandSize::Size64),
3350                 ALUOp::AddS32 => ("adds", OperandSize::Size32),
3351                 ALUOp::AddS64 => ("adds", OperandSize::Size64),
3352                 ALUOp::SubS32 => ("subs", OperandSize::Size32),
3353                 ALUOp::SubS64 => ("subs", OperandSize::Size64),
3354                 ALUOp::SMulH => ("smulh", OperandSize::Size64),
3355                 ALUOp::UMulH => ("umulh", OperandSize::Size64),
3356                 ALUOp::SDiv64 => ("sdiv", OperandSize::Size64),
3357                 ALUOp::UDiv64 => ("udiv", OperandSize::Size64),
3358                 ALUOp::AndNot32 => ("bic", OperandSize::Size32),
3359                 ALUOp::AndNot64 => ("bic", OperandSize::Size64),
3360                 ALUOp::OrrNot32 => ("orn", OperandSize::Size32),
3361                 ALUOp::OrrNot64 => ("orn", OperandSize::Size64),
3362                 ALUOp::EorNot32 => ("eon", OperandSize::Size32),
3363                 ALUOp::EorNot64 => ("eon", OperandSize::Size64),
3364                 ALUOp::RotR32 => ("ror", OperandSize::Size32),
3365                 ALUOp::RotR64 => ("ror", OperandSize::Size64),
3366                 ALUOp::Lsr32 => ("lsr", OperandSize::Size32),
3367                 ALUOp::Lsr64 => ("lsr", OperandSize::Size64),
3368                 ALUOp::Asr32 => ("asr", OperandSize::Size32),
3369                 ALUOp::Asr64 => ("asr", OperandSize::Size64),
3370                 ALUOp::Lsl32 => ("lsl", OperandSize::Size32),
3371                 ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
3372                 ALUOp::Adc32 => ("adc", OperandSize::Size32),
3373                 ALUOp::Adc64 => ("adc", OperandSize::Size64),
3374                 ALUOp::AdcS32 => ("adcs", OperandSize::Size32),
3375                 ALUOp::AdcS64 => ("adcs", OperandSize::Size64),
3376                 ALUOp::Sbc32 => ("sbc", OperandSize::Size32),
3377                 ALUOp::Sbc64 => ("sbc", OperandSize::Size64),
3378                 ALUOp::SbcS32 => ("sbcs", OperandSize::Size32),
3379                 ALUOp::SbcS64 => ("sbcs", OperandSize::Size64),
3380             }
3381         }
3382 
3383         match self {
3384             &Inst::Nop0 => "nop-zero-len".to_string(),
3385             &Inst::Nop4 => "nop".to_string(),
3386             &Inst::AluRRR { alu_op, rd, rn, rm } => {
3387                 let (op, size) = op_name_size(alu_op);
3388                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3389                 let rn = show_ireg_sized(rn, mb_rru, size);
3390                 let rm = show_ireg_sized(rm, mb_rru, size);
3391                 format!("{} {}, {}, {}", op, rd, rn, rm)
3392             }
3393             &Inst::AluRRRR {
3394                 alu_op,
3395                 rd,
3396                 rn,
3397                 rm,
3398                 ra,
3399             } => {
3400                 let (op, size) = match alu_op {
3401                     ALUOp3::MAdd32 => ("madd", OperandSize::Size32),
3402                     ALUOp3::MAdd64 => ("madd", OperandSize::Size64),
3403                     ALUOp3::MSub32 => ("msub", OperandSize::Size32),
3404                     ALUOp3::MSub64 => ("msub", OperandSize::Size64),
3405                 };
3406                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3407                 let rn = show_ireg_sized(rn, mb_rru, size);
3408                 let rm = show_ireg_sized(rm, mb_rru, size);
3409                 let ra = show_ireg_sized(ra, mb_rru, size);
3410 
3411                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
3412             }
3413             &Inst::AluRRImm12 {
3414                 alu_op,
3415                 rd,
3416                 rn,
3417                 ref imm12,
3418             } => {
3419                 let (op, size) = op_name_size(alu_op);
3420                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3421                 let rn = show_ireg_sized(rn, mb_rru, size);
3422 
3423                 if imm12.bits == 0 && alu_op == ALUOp::Add64 {
3424                     // special-case MOV (used for moving into SP).
3425                     format!("mov {}, {}", rd, rn)
3426                 } else {
3427                     let imm12 = imm12.show_rru(mb_rru);
3428                     format!("{} {}, {}, {}", op, rd, rn, imm12)
3429                 }
3430             }
3431             &Inst::AluRRImmLogic {
3432                 alu_op,
3433                 rd,
3434                 rn,
3435                 ref imml,
3436             } => {
3437                 let (op, size) = op_name_size(alu_op);
3438                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3439                 let rn = show_ireg_sized(rn, mb_rru, size);
3440                 let imml = imml.show_rru(mb_rru);
3441                 format!("{} {}, {}, {}", op, rd, rn, imml)
3442             }
3443             &Inst::AluRRImmShift {
3444                 alu_op,
3445                 rd,
3446                 rn,
3447                 ref immshift,
3448             } => {
3449                 let (op, size) = op_name_size(alu_op);
3450                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3451                 let rn = show_ireg_sized(rn, mb_rru, size);
3452                 let immshift = immshift.show_rru(mb_rru);
3453                 format!("{} {}, {}, {}", op, rd, rn, immshift)
3454             }
3455             &Inst::AluRRRShift {
3456                 alu_op,
3457                 rd,
3458                 rn,
3459                 rm,
3460                 ref shiftop,
3461             } => {
3462                 let (op, size) = op_name_size(alu_op);
3463                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3464                 let rn = show_ireg_sized(rn, mb_rru, size);
3465                 let rm = show_ireg_sized(rm, mb_rru, size);
3466                 let shiftop = shiftop.show_rru(mb_rru);
3467                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop)
3468             }
3469             &Inst::AluRRRExtend {
3470                 alu_op,
3471                 rd,
3472                 rn,
3473                 rm,
3474                 ref extendop,
3475             } => {
3476                 let (op, size) = op_name_size(alu_op);
3477                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3478                 let rn = show_ireg_sized(rn, mb_rru, size);
3479                 let rm = show_ireg_sized(rm, mb_rru, size);
3480                 let extendop = extendop.show_rru(mb_rru);
3481                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
3482             }
3483             &Inst::BitRR { op, rd, rn } => {
3484                 let size = op.operand_size();
3485                 let op = op.op_str();
3486                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3487                 let rn = show_ireg_sized(rn, mb_rru, size);
3488                 format!("{} {}, {}", op, rd, rn)
3489             }
3490             &Inst::ULoad8 { rd, ref mem, .. }
3491             | &Inst::SLoad8 { rd, ref mem, .. }
3492             | &Inst::ULoad16 { rd, ref mem, .. }
3493             | &Inst::SLoad16 { rd, ref mem, .. }
3494             | &Inst::ULoad32 { rd, ref mem, .. }
3495             | &Inst::SLoad32 { rd, ref mem, .. }
3496             | &Inst::ULoad64 { rd, ref mem, .. } => {
3497                 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3498 
3499                 let is_unscaled = match &mem {
3500                     &AMode::Unscaled(..) => true,
3501                     _ => false,
3502                 };
3503                 let (op, size) = match (self, is_unscaled) {
3504                     (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
3505                     (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
3506                     (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
3507                     (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
3508                     (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
3509                     (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
3510                     (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
3511                     (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
3512                     (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
3513                     (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
3514                     (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
3515                     (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
3516                     (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
3517                     (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
3518                     _ => unreachable!(),
3519                 };
3520                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3521                 let mem = mem.show_rru(mb_rru);
3522                 format!("{}{} {}, {}", mem_str, op, rd, mem)
3523             }
3524             &Inst::Store8 { rd, ref mem, .. }
3525             | &Inst::Store16 { rd, ref mem, .. }
3526             | &Inst::Store32 { rd, ref mem, .. }
3527             | &Inst::Store64 { rd, ref mem, .. } => {
3528                 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3529 
3530                 let is_unscaled = match &mem {
3531                     &AMode::Unscaled(..) => true,
3532                     _ => false,
3533                 };
3534                 let (op, size) = match (self, is_unscaled) {
3535                     (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
3536                     (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
3537                     (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
3538                     (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
3539                     (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
3540                     (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
3541                     (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
3542                     (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
3543                     _ => unreachable!(),
3544                 };
3545                 let rd = show_ireg_sized(rd, mb_rru, size);
3546                 let mem = mem.show_rru(mb_rru);
3547                 format!("{}{} {}, {}", mem_str, op, rd, mem)
3548             }
3549             &Inst::StoreP64 {
3550                 rt, rt2, ref mem, ..
3551             } => {
3552                 let rt = rt.show_rru(mb_rru);
3553                 let rt2 = rt2.show_rru(mb_rru);
3554                 let mem = mem.show_rru(mb_rru);
3555                 format!("stp {}, {}, {}", rt, rt2, mem)
3556             }
3557             &Inst::LoadP64 {
3558                 rt, rt2, ref mem, ..
3559             } => {
3560                 let rt = rt.to_reg().show_rru(mb_rru);
3561                 let rt2 = rt2.to_reg().show_rru(mb_rru);
3562                 let mem = mem.show_rru(mb_rru);
3563                 format!("ldp {}, {}, {}", rt, rt2, mem)
3564             }
3565             &Inst::Mov64 { rd, rm } => {
3566                 let rd = rd.to_reg().show_rru(mb_rru);
3567                 let rm = rm.show_rru(mb_rru);
3568                 format!("mov {}, {}", rd, rm)
3569             }
3570             &Inst::Mov32 { rd, rm } => {
3571                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
3572                 let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32);
3573                 format!("mov {}, {}", rd, rm)
3574             }
3575             &Inst::MovZ { rd, ref imm, size } => {
3576                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3577                 let imm = imm.show_rru(mb_rru);
3578                 format!("movz {}, {}", rd, imm)
3579             }
3580             &Inst::MovN { rd, ref imm, size } => {
3581                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3582                 let imm = imm.show_rru(mb_rru);
3583                 format!("movn {}, {}", rd, imm)
3584             }
3585             &Inst::MovK { rd, ref imm, size } => {
3586                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3587                 let imm = imm.show_rru(mb_rru);
3588                 format!("movk {}, {}", rd, imm)
3589             }
3590             &Inst::CSel { rd, rn, rm, cond } => {
3591                 let rd = rd.to_reg().show_rru(mb_rru);
3592                 let rn = rn.show_rru(mb_rru);
3593                 let rm = rm.show_rru(mb_rru);
3594                 let cond = cond.show_rru(mb_rru);
3595                 format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
3596             }
3597             &Inst::CSet { rd, cond } => {
3598                 let rd = rd.to_reg().show_rru(mb_rru);
3599                 let cond = cond.show_rru(mb_rru);
3600                 format!("cset {}, {}", rd, cond)
3601             }
3602             &Inst::CSetm { rd, cond } => {
3603                 let rd = rd.to_reg().show_rru(mb_rru);
3604                 let cond = cond.show_rru(mb_rru);
3605                 format!("csetm {}, {}", rd, cond)
3606             }
3607             &Inst::CCmpImm {
3608                 size,
3609                 rn,
3610                 imm,
3611                 nzcv,
3612                 cond,
3613             } => {
3614                 let rn = show_ireg_sized(rn, mb_rru, size);
3615                 let imm = imm.show_rru(mb_rru);
3616                 let nzcv = nzcv.show_rru(mb_rru);
3617                 let cond = cond.show_rru(mb_rru);
3618                 format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
3619             }
3620             &Inst::AtomicRMW { ty, op, .. } => {
3621                 format!(
3622                     "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
3623                     ty.bits(), op)
3624             }
3625             &Inst::AtomicCAS { rs, rt, rn, ty } => {
3626                 let op = match ty {
3627                     I8 => "casalb",
3628                     I16 => "casalh",
3629                     I32 | I64 => "casal",
3630                     _ => panic!("Unsupported type: {}", ty),
3631                 };
3632                 let size = OperandSize::from_ty(ty);
3633                 let rs = show_ireg_sized(rs.to_reg(), mb_rru, size);
3634                 let rt = show_ireg_sized(rt, mb_rru, size);
3635                 let rn = rn.show_rru(mb_rru);
3636 
3637                 format!("{} {}, {}, [{}]", op, rs, rt, rn)
3638             }
3639             &Inst::AtomicCASLoop { ty } => {
3640                 format!(
3641                     "atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
3642                     ty.bits())
3643             }
3644             &Inst::LoadAcquire {
3645                 access_ty, rt, rn, ..
3646             } => {
3647                 let (op, ty) = match access_ty {
3648                     I8 => ("ldarb", I32),
3649                     I16 => ("ldarh", I32),
3650                     I32 => ("ldar", I32),
3651                     I64 => ("ldar", I64),
3652                     _ => panic!("Unsupported type: {}", access_ty),
3653                 };
3654                 let size = OperandSize::from_ty(ty);
3655                 let rt = show_ireg_sized(rt.to_reg(), mb_rru, size);
3656                 let rn = rn.show_rru(mb_rru);
3657                 format!("{} {}, [{}]", op, rt, rn)
3658             }
3659             &Inst::StoreRelease {
3660                 access_ty, rt, rn, ..
3661             } => {
3662                 let (op, ty) = match access_ty {
3663                     I8 => ("stlrb", I32),
3664                     I16 => ("stlrh", I32),
3665                     I32 => ("stlr", I32),
3666                     I64 => ("stlr", I64),
3667                     _ => panic!("Unsupported type: {}", access_ty),
3668                 };
3669                 let size = OperandSize::from_ty(ty);
3670                 let rt = show_ireg_sized(rt, mb_rru, size);
3671                 let rn = rn.show_rru(mb_rru);
3672                 format!("{} {}, [{}]", op, rt, rn)
3673             }
3674             &Inst::Fence {} => {
3675                 format!("dmb ish")
3676             }
3677             &Inst::FpuMove64 { rd, rn } => {
3678                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
3679                 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
3680                 format!("fmov {}, {}", rd, rn)
3681             }
3682             &Inst::FpuMove128 { rd, rn } => {
3683                 let rd = rd.to_reg().show_rru(mb_rru);
3684                 let rn = rn.show_rru(mb_rru);
3685                 format!("mov {}.16b, {}.16b", rd, rn)
3686             }
3687             &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
3688                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
3689                 let rn = show_vreg_element(rn, mb_rru, idx, size);
3690                 format!("mov {}, {}", rd, rn)
3691             }
3692             &Inst::FpuExtend { rd, rn, size } => {
3693                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3694                 let rn = show_vreg_scalar(rn, mb_rru, size);
3695 
3696                 format!("fmov {}, {}", rd, rn)
3697             }
3698             &Inst::FpuRR { fpu_op, rd, rn } => {
3699                 let (op, sizesrc, sizedest) = match fpu_op {
3700                     FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32),
3701                     FPUOp1::Abs64 => ("fabs", ScalarSize::Size64, ScalarSize::Size64),
3702                     FPUOp1::Neg32 => ("fneg", ScalarSize::Size32, ScalarSize::Size32),
3703                     FPUOp1::Neg64 => ("fneg", ScalarSize::Size64, ScalarSize::Size64),
3704                     FPUOp1::Sqrt32 => ("fsqrt", ScalarSize::Size32, ScalarSize::Size32),
3705                     FPUOp1::Sqrt64 => ("fsqrt", ScalarSize::Size64, ScalarSize::Size64),
3706                     FPUOp1::Cvt32To64 => ("fcvt", ScalarSize::Size32, ScalarSize::Size64),
3707                     FPUOp1::Cvt64To32 => ("fcvt", ScalarSize::Size64, ScalarSize::Size32),
3708                 };
3709                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
3710                 let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
3711                 format!("{} {}, {}", op, rd, rn)
3712             }
3713             &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
3714                 let (op, size) = match fpu_op {
3715                     FPUOp2::Add32 => ("fadd", ScalarSize::Size32),
3716                     FPUOp2::Add64 => ("fadd", ScalarSize::Size64),
3717                     FPUOp2::Sub32 => ("fsub", ScalarSize::Size32),
3718                     FPUOp2::Sub64 => ("fsub", ScalarSize::Size64),
3719                     FPUOp2::Mul32 => ("fmul", ScalarSize::Size32),
3720                     FPUOp2::Mul64 => ("fmul", ScalarSize::Size64),
3721                     FPUOp2::Div32 => ("fdiv", ScalarSize::Size32),
3722                     FPUOp2::Div64 => ("fdiv", ScalarSize::Size64),
3723                     FPUOp2::Max32 => ("fmax", ScalarSize::Size32),
3724                     FPUOp2::Max64 => ("fmax", ScalarSize::Size64),
3725                     FPUOp2::Min32 => ("fmin", ScalarSize::Size32),
3726                     FPUOp2::Min64 => ("fmin", ScalarSize::Size64),
3727                     FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
3728                     FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
3729                     FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
3730                     FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
3731                 };
3732                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3733                 let rn = show_vreg_scalar(rn, mb_rru, size);
3734                 let rm = show_vreg_scalar(rm, mb_rru, size);
3735                 format!("{} {}, {}, {}", op, rd, rn, rm)
3736             }
3737             &Inst::FpuRRI { fpu_op, rd, rn } => {
3738                 let (op, imm, vector) = match fpu_op {
3739                     FPUOpRI::UShr32(imm) => ("ushr", imm.show_rru(mb_rru), true),
3740                     FPUOpRI::UShr64(imm) => ("ushr", imm.show_rru(mb_rru), false),
3741                     FPUOpRI::Sli32(imm) => ("sli", imm.show_rru(mb_rru), true),
3742                     FPUOpRI::Sli64(imm) => ("sli", imm.show_rru(mb_rru), false),
3743                 };
3744 
3745                 let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
3746                     |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2)
3747                 } else {
3748                     |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
3749                 };
3750                 let rd = show_vreg_fn(rd.to_reg(), mb_rru);
3751                 let rn = show_vreg_fn(rn, mb_rru);
3752                 format!("{} {}, {}, {}", op, rd, rn, imm)
3753             }
3754             &Inst::FpuRRRR {
3755                 fpu_op,
3756                 rd,
3757                 rn,
3758                 rm,
3759                 ra,
3760             } => {
3761                 let (op, size) = match fpu_op {
3762                     FPUOp3::MAdd32 => ("fmadd", ScalarSize::Size32),
3763                     FPUOp3::MAdd64 => ("fmadd", ScalarSize::Size64),
3764                 };
3765                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3766                 let rn = show_vreg_scalar(rn, mb_rru, size);
3767                 let rm = show_vreg_scalar(rm, mb_rru, size);
3768                 let ra = show_vreg_scalar(ra, mb_rru, size);
3769                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
3770             }
3771             &Inst::FpuCmp32 { rn, rm } => {
3772                 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
3773                 let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
3774                 format!("fcmp {}, {}", rn, rm)
3775             }
3776             &Inst::FpuCmp64 { rn, rm } => {
3777                 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
3778                 let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
3779                 format!("fcmp {}, {}", rn, rm)
3780             }
3781             &Inst::FpuLoad32 { rd, ref mem, .. } => {
3782                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
3783                 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3784                 let mem = mem.show_rru(mb_rru);
3785                 format!("{}ldr {}, {}", mem_str, rd, mem)
3786             }
3787             &Inst::FpuLoad64 { rd, ref mem, .. } => {
3788                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
3789                 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3790                 let mem = mem.show_rru(mb_rru);
3791                 format!("{}ldr {}, {}", mem_str, rd, mem)
3792             }
3793             &Inst::FpuLoad128 { rd, ref mem, .. } => {
3794                 let rd = rd.to_reg().show_rru(mb_rru);
3795                 let rd = "q".to_string() + &rd[1..];
3796                 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3797                 let mem = mem.show_rru(mb_rru);
3798                 format!("{}ldr {}, {}", mem_str, rd, mem)
3799             }
3800             &Inst::FpuStore32 { rd, ref mem, .. } => {
3801                 let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size32);
3802                 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3803                 let mem = mem.show_rru(mb_rru);
3804                 format!("{}str {}, {}", mem_str, rd, mem)
3805             }
3806             &Inst::FpuStore64 { rd, ref mem, .. } => {
3807                 let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size64);
3808                 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3809                 let mem = mem.show_rru(mb_rru);
3810                 format!("{}str {}, {}", mem_str, rd, mem)
3811             }
3812             &Inst::FpuStore128 { rd, ref mem, .. } => {
3813                 let rd = rd.show_rru(mb_rru);
3814                 let rd = "q".to_string() + &rd[1..];
3815                 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3816                 let mem = mem.show_rru(mb_rru);
3817                 format!("{}str {}, {}", mem_str, rd, mem)
3818             }
3819             &Inst::FpuLoadP64 {
3820                 rt, rt2, ref mem, ..
3821             } => {
3822                 let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size64);
3823                 let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size64);
3824                 let mem = mem.show_rru(mb_rru);
3825 
3826                 format!("ldp {}, {}, {}", rt, rt2, mem)
3827             }
3828             &Inst::FpuStoreP64 {
3829                 rt, rt2, ref mem, ..
3830             } => {
3831                 let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size64);
3832                 let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size64);
3833                 let mem = mem.show_rru(mb_rru);
3834 
3835                 format!("stp {}, {}, {}", rt, rt2, mem)
3836             }
3837             &Inst::FpuLoadP128 {
3838                 rt, rt2, ref mem, ..
3839             } => {
3840                 let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size128);
3841                 let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size128);
3842                 let mem = mem.show_rru(mb_rru);
3843 
3844                 format!("ldp {}, {}, {}", rt, rt2, mem)
3845             }
3846             &Inst::FpuStoreP128 {
3847                 rt, rt2, ref mem, ..
3848             } => {
3849                 let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size128);
3850                 let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size128);
3851                 let mem = mem.show_rru(mb_rru);
3852 
3853                 format!("stp {}, {}, {}", rt, rt2, mem)
3854             }
3855             &Inst::LoadFpuConst64 { rd, const_data } => {
3856                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
3857                 format!(
3858                     "ldr {}, pc+8 ; b 12 ; data.f64 {}",
3859                     rd,
3860                     f64::from_bits(const_data)
3861                 )
3862             }
3863             &Inst::LoadFpuConst128 { rd, const_data } => {
3864                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
3865                 format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data)
3866             }
3867             &Inst::FpuToInt { op, rd, rn } => {
3868                 let (op, sizesrc, sizedest) = match op {
3869                     FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
3870                     FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
3871                     FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
3872                     FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
3873                     FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
3874                     FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
3875                     FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
3876                     FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
3877                 };
3878                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, sizedest);
3879                 let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
3880                 format!("{} {}, {}", op, rd, rn)
3881             }
3882             &Inst::IntToFpu { op, rd, rn } => {
3883                 let (op, sizesrc, sizedest) = match op {
3884                     IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
3885                     IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
3886                     IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
3887                     IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
3888                     IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
3889                     IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
3890                     IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
3891                     IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
3892                 };
3893                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
3894                 let rn = show_ireg_sized(rn, mb_rru, sizesrc);
3895                 format!("{} {}, {}", op, rd, rn)
3896             }
3897             &Inst::FpuCSel32 { rd, rn, rm, cond } => {
3898                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
3899                 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
3900                 let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
3901                 let cond = cond.show_rru(mb_rru);
3902                 format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
3903             }
3904             &Inst::FpuCSel64 { rd, rn, rm, cond } => {
3905                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
3906                 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
3907                 let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
3908                 let cond = cond.show_rru(mb_rru);
3909                 format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
3910             }
3911             &Inst::FpuRound { op, rd, rn } => {
3912                 let (inst, size) = match op {
3913                     FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
3914                     FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
3915                     FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
3916                     FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
3917                     FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
3918                     FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
3919                     FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
3920                     FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
3921                 };
3922                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3923                 let rn = show_vreg_scalar(rn, mb_rru, size);
3924                 format!("{} {}, {}", inst, rd, rn)
3925             }
3926             &Inst::MovToFpu { rd, rn, size } => {
3927                 let operand_size = size.operand_size();
3928                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3929                 let rn = show_ireg_sized(rn, mb_rru, operand_size);
3930                 format!("fmov {}, {}", rd, rn)
3931             }
3932             &Inst::MovToVec { rd, rn, idx, size } => {
3933                 let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
3934                 let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
3935                 format!("mov {}, {}", rd, rn)
3936             }
3937             &Inst::MovFromVec { rd, rn, idx, size } => {
3938                 let op = match size {
3939                     VectorSize::Size8x16 => "umov",
3940                     VectorSize::Size16x8 => "umov",
3941                     VectorSize::Size32x4 => "mov",
3942                     VectorSize::Size64x2 => "mov",
3943                     _ => unimplemented!(),
3944                 };
3945                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size());
3946                 let rn = show_vreg_element(rn, mb_rru, idx, size);
3947                 format!("{} {}, {}", op, rd, rn)
3948             }
3949             &Inst::MovFromVecSigned {
3950                 rd,
3951                 rn,
3952                 idx,
3953                 size,
3954                 scalar_size,
3955             } => {
3956                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, scalar_size);
3957                 let rn = show_vreg_element(rn, mb_rru, idx, size);
3958                 format!("smov {}, {}", rd, rn)
3959             }
3960             &Inst::VecDup { rd, rn, size } => {
3961                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
3962                 let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
3963                 format!("dup {}, {}", rd, rn)
3964             }
3965             &Inst::VecDupFromFpu { rd, rn, size } => {
3966                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
3967                 let rn = show_vreg_element(rn, mb_rru, 0, size);
3968                 format!("dup {}, {}", rd, rn)
3969             }
3970             &Inst::VecDupFPImm { rd, imm, size } => {
3971                 let imm = imm.show_rru(mb_rru);
3972                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
3973 
3974                 format!("fmov {}, {}", rd, imm)
3975             }
3976             &Inst::VecDupImm {
3977                 rd,
3978                 imm,
3979                 invert,
3980                 size,
3981             } => {
3982                 let imm = imm.show_rru(mb_rru);
3983                 let op = if invert { "mvni" } else { "movi" };
3984                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
3985 
3986                 format!("{} {}, {}", op, rd, imm)
3987             }
3988             &Inst::VecExtend {
3989                 t,
3990                 rd,
3991                 rn,
3992                 high_half,
3993             } => {
3994                 let (op, dest, src) = match (t, high_half) {
3995                     (VecExtendOp::Sxtl8, false) => {
3996                         ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8)
3997                     }
3998                     (VecExtendOp::Sxtl8, true) => {
3999                         ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
4000                     }
4001                     (VecExtendOp::Sxtl16, false) => {
4002                         ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4)
4003                     }
4004                     (VecExtendOp::Sxtl16, true) => {
4005                         ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
4006                     }
4007                     (VecExtendOp::Sxtl32, false) => {
4008                         ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2)
4009                     }
4010                     (VecExtendOp::Sxtl32, true) => {
4011                         ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
4012                     }
4013                     (VecExtendOp::Uxtl8, false) => {
4014                         ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8)
4015                     }
4016                     (VecExtendOp::Uxtl8, true) => {
4017                         ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
4018                     }
4019                     (VecExtendOp::Uxtl16, false) => {
4020                         ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4)
4021                     }
4022                     (VecExtendOp::Uxtl16, true) => {
4023                         ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
4024                     }
4025                     (VecExtendOp::Uxtl32, false) => {
4026                         ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2)
4027                     }
4028                     (VecExtendOp::Uxtl32, true) => {
4029                         ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
4030                     }
4031                 };
4032                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
4033                 let rn = show_vreg_vector(rn, mb_rru, src);
4034                 format!("{} {}, {}", op, rd, rn)
4035             }
4036             &Inst::VecMovElement {
4037                 rd,
4038                 rn,
4039                 dest_idx,
4040                 src_idx,
4041                 size,
4042             } => {
4043                 let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size);
4044                 let rn = show_vreg_element(rn, mb_rru, src_idx, size);
4045                 format!("mov {}, {}", rd, rn)
4046             }
4047             &Inst::VecRRLong {
4048                 op,
4049                 rd,
4050                 rn,
4051                 high_half,
4052             } => {
4053                 let (op, rd_size, size, suffix) = match (op, high_half) {
4054                     (VecRRLongOp::Fcvtl16, false) => {
4055                         ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")
4056                     }
4057                     (VecRRLongOp::Fcvtl16, true) => {
4058                         ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")
4059                     }
4060                     (VecRRLongOp::Fcvtl32, false) => {
4061                         ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")
4062                     }
4063                     (VecRRLongOp::Fcvtl32, true) => {
4064                         ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")
4065                     }
4066                     (VecRRLongOp::Shll8, false) => {
4067                         ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")
4068                     }
4069                     (VecRRLongOp::Shll8, true) => {
4070                         ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")
4071                     }
4072                     (VecRRLongOp::Shll16, false) => {
4073                         ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")
4074                     }
4075                     (VecRRLongOp::Shll16, true) => {
4076                         ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")
4077                     }
4078                     (VecRRLongOp::Shll32, false) => {
4079                         ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")
4080                     }
4081                     (VecRRLongOp::Shll32, true) => {
4082                         ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
4083                     }
4084                 };
4085                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
4086                 let rn = show_vreg_vector(rn, mb_rru, size);
4087 
4088                 format!("{} {}, {}{}", op, rd, rn, suffix)
4089             }
4090             &Inst::VecRRNarrow {
4091                 op,
4092                 rd,
4093                 rn,
4094                 high_half,
4095             } => {
4096                 let (op, rd_size, size) = match (op, high_half) {
4097                     (VecRRNarrowOp::Xtn16, false) => {
4098                         ("xtn", VectorSize::Size8x8, VectorSize::Size16x8)
4099                     }
4100                     (VecRRNarrowOp::Xtn16, true) => {
4101                         ("xtn2", VectorSize::Size8x16, VectorSize::Size16x8)
4102                     }
4103                     (VecRRNarrowOp::Xtn32, false) => {
4104                         ("xtn", VectorSize::Size16x4, VectorSize::Size32x4)
4105                     }
4106                     (VecRRNarrowOp::Xtn32, true) => {
4107                         ("xtn2", VectorSize::Size16x8, VectorSize::Size32x4)
4108                     }
4109                     (VecRRNarrowOp::Xtn64, false) => {
4110                         ("xtn", VectorSize::Size32x2, VectorSize::Size64x2)
4111                     }
4112                     (VecRRNarrowOp::Xtn64, true) => {
4113                         ("xtn2", VectorSize::Size32x4, VectorSize::Size64x2)
4114                     }
4115                     (VecRRNarrowOp::Sqxtn16, false) => {
4116                         ("sqxtn", VectorSize::Size8x8, VectorSize::Size16x8)
4117                     }
4118                     (VecRRNarrowOp::Sqxtn16, true) => {
4119                         ("sqxtn2", VectorSize::Size8x16, VectorSize::Size16x8)
4120                     }
4121                     (VecRRNarrowOp::Sqxtn32, false) => {
4122                         ("sqxtn", VectorSize::Size16x4, VectorSize::Size32x4)
4123                     }
4124                     (VecRRNarrowOp::Sqxtn32, true) => {
4125                         ("sqxtn2", VectorSize::Size16x8, VectorSize::Size32x4)
4126                     }
4127                     (VecRRNarrowOp::Sqxtn64, false) => {
4128                         ("sqxtn", VectorSize::Size32x2, VectorSize::Size64x2)
4129                     }
4130                     (VecRRNarrowOp::Sqxtn64, true) => {
4131                         ("sqxtn2", VectorSize::Size32x4, VectorSize::Size64x2)
4132                     }
4133                     (VecRRNarrowOp::Sqxtun16, false) => {
4134                         ("sqxtun", VectorSize::Size8x8, VectorSize::Size16x8)
4135                     }
4136                     (VecRRNarrowOp::Sqxtun16, true) => {
4137                         ("sqxtun2", VectorSize::Size8x16, VectorSize::Size16x8)
4138                     }
4139                     (VecRRNarrowOp::Sqxtun32, false) => {
4140                         ("sqxtun", VectorSize::Size16x4, VectorSize::Size32x4)
4141                     }
4142                     (VecRRNarrowOp::Sqxtun32, true) => {
4143                         ("sqxtun2", VectorSize::Size16x8, VectorSize::Size32x4)
4144                     }
4145                     (VecRRNarrowOp::Sqxtun64, false) => {
4146                         ("sqxtun", VectorSize::Size32x2, VectorSize::Size64x2)
4147                     }
4148                     (VecRRNarrowOp::Sqxtun64, true) => {
4149                         ("sqxtun2", VectorSize::Size32x4, VectorSize::Size64x2)
4150                     }
4151                     (VecRRNarrowOp::Uqxtn16, false) => {
4152                         ("uqxtn", VectorSize::Size8x8, VectorSize::Size16x8)
4153                     }
4154                     (VecRRNarrowOp::Uqxtn16, true) => {
4155                         ("uqxtn2", VectorSize::Size8x16, VectorSize::Size16x8)
4156                     }
4157                     (VecRRNarrowOp::Uqxtn32, false) => {
4158                         ("uqxtn", VectorSize::Size16x4, VectorSize::Size32x4)
4159                     }
4160                     (VecRRNarrowOp::Uqxtn32, true) => {
4161                         ("uqxtn2", VectorSize::Size16x8, VectorSize::Size32x4)
4162                     }
4163                     (VecRRNarrowOp::Uqxtn64, false) => {
4164                         ("uqxtn", VectorSize::Size32x2, VectorSize::Size64x2)
4165                     }
4166                     (VecRRNarrowOp::Uqxtn64, true) => {
4167                         ("uqxtn2", VectorSize::Size32x4, VectorSize::Size64x2)
4168                     }
4169                     (VecRRNarrowOp::Fcvtn32, false) => {
4170                         ("fcvtn", VectorSize::Size16x4, VectorSize::Size32x4)
4171                     }
4172                     (VecRRNarrowOp::Fcvtn32, true) => {
4173                         ("fcvtn2", VectorSize::Size16x8, VectorSize::Size32x4)
4174                     }
4175                     (VecRRNarrowOp::Fcvtn64, false) => {
4176                         ("fcvtn", VectorSize::Size32x2, VectorSize::Size64x2)
4177                     }
4178                     (VecRRNarrowOp::Fcvtn64, true) => {
4179                         ("fcvtn2", VectorSize::Size32x4, VectorSize::Size64x2)
4180                     }
4181                 };
4182                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
4183                 let rn = show_vreg_vector(rn, mb_rru, size);
4184 
4185                 format!("{} {}, {}", op, rd, rn)
4186             }
4187             &Inst::VecRRPair { op, rd, rn } => {
4188                 let op = match op {
4189                     VecPairOp::Addp => "addp",
4190                 };
4191                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
4192                 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size64x2);
4193 
4194                 format!("{} {}, {}", op, rd, rn)
4195             }
4196             &Inst::VecRRPairLong { op, rd, rn } => {
4197                 let (op, dest, src) = match op {
4198                     VecRRPairLongOp::Saddlp8 => {
4199                         ("saddlp", VectorSize::Size16x8, VectorSize::Size8x16)
4200                     }
4201                     VecRRPairLongOp::Saddlp16 => {
4202                         ("saddlp", VectorSize::Size32x4, VectorSize::Size16x8)
4203                     }
4204                     VecRRPairLongOp::Uaddlp8 => {
4205                         ("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16)
4206                     }
4207                     VecRRPairLongOp::Uaddlp16 => {
4208                         ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)
4209                     }
4210                 };
4211                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
4212                 let rn = show_vreg_vector(rn, mb_rru, src);
4213 
4214                 format!("{} {}, {}", op, rd, rn)
4215             }
4216             &Inst::VecRRR {
4217                 rd,
4218                 rn,
4219                 rm,
4220                 alu_op,
4221                 size,
4222             } => {
4223                 let (op, size) = match alu_op {
4224                     VecALUOp::Sqadd => ("sqadd", size),
4225                     VecALUOp::Uqadd => ("uqadd", size),
4226                     VecALUOp::Sqsub => ("sqsub", size),
4227                     VecALUOp::Uqsub => ("uqsub", size),
4228                     VecALUOp::Cmeq => ("cmeq", size),
4229                     VecALUOp::Cmge => ("cmge", size),
4230                     VecALUOp::Cmgt => ("cmgt", size),
4231                     VecALUOp::Cmhs => ("cmhs", size),
4232                     VecALUOp::Cmhi => ("cmhi", size),
4233                     VecALUOp::Fcmeq => ("fcmeq", size),
4234                     VecALUOp::Fcmgt => ("fcmgt", size),
4235                     VecALUOp::Fcmge => ("fcmge", size),
4236                     VecALUOp::And => ("and", VectorSize::Size8x16),
4237                     VecALUOp::Bic => ("bic", VectorSize::Size8x16),
4238                     VecALUOp::Orr => ("orr", VectorSize::Size8x16),
4239                     VecALUOp::Eor => ("eor", VectorSize::Size8x16),
4240                     VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
4241                     VecALUOp::Umaxp => ("umaxp", size),
4242                     VecALUOp::Add => ("add", size),
4243                     VecALUOp::Sub => ("sub", size),
4244                     VecALUOp::Mul => ("mul", size),
4245                     VecALUOp::Sshl => ("sshl", size),
4246                     VecALUOp::Ushl => ("ushl", size),
4247                     VecALUOp::Umin => ("umin", size),
4248                     VecALUOp::Smin => ("smin", size),
4249                     VecALUOp::Umax => ("umax", size),
4250                     VecALUOp::Smax => ("smax", size),
4251                     VecALUOp::Urhadd => ("urhadd", size),
4252                     VecALUOp::Fadd => ("fadd", size),
4253                     VecALUOp::Fsub => ("fsub", size),
4254                     VecALUOp::Fdiv => ("fdiv", size),
4255                     VecALUOp::Fmax => ("fmax", size),
4256                     VecALUOp::Fmin => ("fmin", size),
4257                     VecALUOp::Fmul => ("fmul", size),
4258                     VecALUOp::Addp => ("addp", size),
4259                     VecALUOp::Zip1 => ("zip1", size),
4260                     VecALUOp::Sqrdmulh => ("sqrdmulh", size),
4261                 };
4262                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
4263                 let rn = show_vreg_vector(rn, mb_rru, size);
4264                 let rm = show_vreg_vector(rm, mb_rru, size);
4265                 format!("{} {}, {}, {}", op, rd, rn, rm)
4266             }
4267             &Inst::VecRRRLong {
4268                 rd,
4269                 rn,
4270                 rm,
4271                 alu_op,
4272                 high_half,
4273             } => {
4274                 let (op, dest_size, src_size) = match (alu_op, high_half) {
4275                     (VecRRRLongOp::Smull8, false) => {
4276                         ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
4277                     }
4278                     (VecRRRLongOp::Smull8, true) => {
4279                         ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
4280                     }
4281                     (VecRRRLongOp::Smull16, false) => {
4282                         ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
4283                     }
4284                     (VecRRRLongOp::Smull16, true) => {
4285                         ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
4286                     }
4287                     (VecRRRLongOp::Smull32, false) => {
4288                         ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
4289                     }
4290                     (VecRRRLongOp::Smull32, true) => {
4291                         ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
4292                     }
4293                     (VecRRRLongOp::Umull8, false) => {
4294                         ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
4295                     }
4296                     (VecRRRLongOp::Umull8, true) => {
4297                         ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
4298                     }
4299                     (VecRRRLongOp::Umull16, false) => {
4300                         ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
4301                     }
4302                     (VecRRRLongOp::Umull16, true) => {
4303                         ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
4304                     }
4305                     (VecRRRLongOp::Umull32, false) => {
4306                         ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
4307                     }
4308                     (VecRRRLongOp::Umull32, true) => {
4309                         ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
4310                     }
4311                     (VecRRRLongOp::Umlal8, false) => {
4312                         ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
4313                     }
4314                     (VecRRRLongOp::Umlal8, true) => {
4315                         ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
4316                     }
4317                     (VecRRRLongOp::Umlal16, false) => {
4318                         ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
4319                     }
4320                     (VecRRRLongOp::Umlal16, true) => {
4321                         ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
4322                     }
4323                     (VecRRRLongOp::Umlal32, false) => {
4324                         ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
4325                     }
4326                     (VecRRRLongOp::Umlal32, true) => {
4327                         ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
4328                     }
4329                 };
4330                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
4331                 let rn = show_vreg_vector(rn, mb_rru, src_size);
4332                 let rm = show_vreg_vector(rm, mb_rru, src_size);
4333                 format!("{} {}, {}, {}", op, rd, rn, rm)
4334             }
4335             &Inst::VecMisc { op, rd, rn, size } => {
4336                 let (op, size, suffix) = match op {
4337                     VecMisc2::Not => (
4338                         "mvn",
4339                         if size.is_128bits() {
4340                             VectorSize::Size8x16
4341                         } else {
4342                             VectorSize::Size8x8
4343                         },
4344                         "",
4345                     ),
4346                     VecMisc2::Neg => ("neg", size, ""),
4347                     VecMisc2::Abs => ("abs", size, ""),
4348                     VecMisc2::Fabs => ("fabs", size, ""),
4349                     VecMisc2::Fneg => ("fneg", size, ""),
4350                     VecMisc2::Fsqrt => ("fsqrt", size, ""),
4351                     VecMisc2::Rev64 => ("rev64", size, ""),
4352                     VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
4353                     VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
4354                     VecMisc2::Scvtf => ("scvtf", size, ""),
4355                     VecMisc2::Ucvtf => ("ucvtf", size, ""),
4356                     VecMisc2::Frintn => ("frintn", size, ""),
4357                     VecMisc2::Frintz => ("frintz", size, ""),
4358                     VecMisc2::Frintm => ("frintm", size, ""),
4359                     VecMisc2::Frintp => ("frintp", size, ""),
4360                     VecMisc2::Cnt => ("cnt", size, ""),
4361                     VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
4362                 };
4363                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
4364                 let rn = show_vreg_vector(rn, mb_rru, size);
4365                 format!("{} {}, {}{}", op, rd, rn, suffix)
4366             }
4367             &Inst::VecLanes { op, rd, rn, size } => {
4368                 let op = match op {
4369                     VecLanesOp::Uminv => "uminv",
4370                     VecLanesOp::Addv => "addv",
4371                 };
4372                 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
4373                 let rn = show_vreg_vector(rn, mb_rru, size);
4374                 format!("{} {}, {}", op, rd, rn)
4375             }
4376             &Inst::VecShiftImm {
4377                 op,
4378                 rd,
4379                 rn,
4380                 size,
4381                 imm,
4382             } => {
4383                 let op = match op {
4384                     VecShiftImmOp::Shl => "shl",
4385                     VecShiftImmOp::Ushr => "ushr",
4386                     VecShiftImmOp::Sshr => "sshr",
4387                 };
4388                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
4389                 let rn = show_vreg_vector(rn, mb_rru, size);
4390                 format!("{} {}, {}, #{}", op, rd, rn, imm)
4391             }
4392             &Inst::VecExtract { rd, rn, rm, imm4 } => {
4393                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
4394                 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
4395                 let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
4396                 format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
4397             }
4398             &Inst::VecTbl {
4399                 rd,
4400                 rn,
4401                 rm,
4402                 is_extension,
4403             } => {
4404                 let op = if is_extension { "tbx" } else { "tbl" };
4405                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
4406                 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
4407                 let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
4408                 format!("{} {}, {{ {} }}, {}", op, rd, rn, rm)
4409             }
4410             &Inst::VecTbl2 {
4411                 rd,
4412                 rn,
4413                 rn2,
4414                 rm,
4415                 is_extension,
4416             } => {
4417                 let op = if is_extension { "tbx" } else { "tbl" };
4418                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
4419                 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
4420                 let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16);
4421                 let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
4422                 format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
4423             }
4424             &Inst::VecLoadReplicate { rd, rn, size, .. } => {
4425                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
4426                 let rn = rn.show_rru(mb_rru);
4427 
4428                 format!("ld1r {{ {} }}, [{}]", rd, rn)
4429             }
4430             &Inst::VecCSel { rd, rn, rm, cond } => {
4431                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
4432                 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
4433                 let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
4434                 let cond = cond.show_rru(mb_rru);
4435                 format!(
4436                     "vcsel {}, {}, {}, {} (if-then-else diamond)",
4437                     rd, rn, rm, cond
4438                 )
4439             }
4440             &Inst::MovToNZCV { rn } => {
4441                 let rn = rn.show_rru(mb_rru);
4442                 format!("msr nzcv, {}", rn)
4443             }
4444             &Inst::MovFromNZCV { rd } => {
4445                 let rd = rd.to_reg().show_rru(mb_rru);
4446                 format!("mrs {}, nzcv", rd)
4447             }
4448             &Inst::Extend {
4449                 rd,
4450                 rn,
4451                 signed: false,
4452                 from_bits: 1,
4453                 ..
4454             } => {
4455                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
4456                 let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
4457                 format!("and {}, {}, #1", rd, rn)
4458             }
4459             &Inst::Extend {
4460                 rd,
4461                 rn,
4462                 signed: false,
4463                 from_bits: 32,
4464                 to_bits: 64,
4465             } => {
4466                 // The case of a zero extension from 32 to 64 bits, is implemented
4467                 // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
4468                 // the top 32 bits.
4469                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
4470                 let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
4471                 format!("mov {}, {}", rd, rn)
4472             }
4473             &Inst::Extend {
4474                 rd,
4475                 rn,
4476                 signed,
4477                 from_bits,
4478                 to_bits,
4479             } => {
4480                 assert!(from_bits <= to_bits);
4481                 let op = match (signed, from_bits) {
4482                     (false, 8) => "uxtb",
4483                     (true, 8) => "sxtb",
4484                     (false, 16) => "uxth",
4485                     (true, 16) => "sxth",
4486                     (true, 32) => "sxtw",
4487                     (true, _) => "sbfx",
4488                     (false, _) => "ubfx",
4489                 };
4490                 if op == "sbfx" || op == "ubfx" {
4491                     let dest_size = OperandSize::from_bits(to_bits);
4492                     let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
4493                     let rn = show_ireg_sized(rn, mb_rru, dest_size);
4494                     format!("{} {}, {}, #0, #{}", op, rd, rn, from_bits)
4495                 } else {
4496                     let dest_size = if signed {
4497                         OperandSize::from_bits(to_bits)
4498                     } else {
4499                         OperandSize::Size32
4500                     };
4501                     let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
4502                     let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
4503                     format!("{} {}, {}", op, rd, rn)
4504                 }
4505             }
4506             &Inst::Call { .. } => format!("bl 0"),
4507             &Inst::CallInd { ref info, .. } => {
4508                 let rn = info.rn.show_rru(mb_rru);
4509                 format!("blr {}", rn)
4510             }
4511             &Inst::Ret => "ret".to_string(),
4512             &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
4513             &Inst::Jump { ref dest } => {
4514                 let dest = dest.show_rru(mb_rru);
4515                 format!("b {}", dest)
4516             }
4517             &Inst::CondBr {
4518                 ref taken,
4519                 ref not_taken,
4520                 ref kind,
4521             } => {
4522                 let taken = taken.show_rru(mb_rru);
4523                 let not_taken = not_taken.show_rru(mb_rru);
4524                 match kind {
4525                     &CondBrKind::Zero(reg) => {
4526                         let reg = reg.show_rru(mb_rru);
4527                         format!("cbz {}, {} ; b {}", reg, taken, not_taken)
4528                     }
4529                     &CondBrKind::NotZero(reg) => {
4530                         let reg = reg.show_rru(mb_rru);
4531                         format!("cbnz {}, {} ; b {}", reg, taken, not_taken)
4532                     }
4533                     &CondBrKind::Cond(c) => {
4534                         let c = c.show_rru(mb_rru);
4535                         format!("b.{} {} ; b {}", c, taken, not_taken)
4536                     }
4537                 }
4538             }
4539             &Inst::IndirectBr { rn, .. } => {
4540                 let rn = rn.show_rru(mb_rru);
4541                 format!("br {}", rn)
4542             }
4543             &Inst::Brk => "brk #0".to_string(),
4544             &Inst::Udf { .. } => "udf".to_string(),
4545             &Inst::TrapIf { ref kind, .. } => match kind {
4546                 &CondBrKind::Zero(reg) => {
4547                     let reg = reg.show_rru(mb_rru);
4548                     format!("cbnz {}, 8 ; udf", reg)
4549                 }
4550                 &CondBrKind::NotZero(reg) => {
4551                     let reg = reg.show_rru(mb_rru);
4552                     format!("cbz {}, 8 ; udf", reg)
4553                 }
4554                 &CondBrKind::Cond(c) => {
4555                     let c = c.invert().show_rru(mb_rru);
4556                     format!("b.{} 8 ; udf", c)
4557                 }
4558             },
4559             &Inst::Adr { rd, off } => {
4560                 let rd = rd.show_rru(mb_rru);
4561                 format!("adr {}, pc+{}", rd, off)
4562             }
4563             &Inst::Word4 { data } => format!("data.i32 {}", data),
4564             &Inst::Word8 { data } => format!("data.i64 {}", data),
4565             &Inst::JTSequence {
4566                 ref info,
4567                 ridx,
4568                 rtmp1,
4569                 rtmp2,
4570                 ..
4571             } => {
4572                 let ridx = ridx.show_rru(mb_rru);
4573                 let rtmp1 = rtmp1.show_rru(mb_rru);
4574                 let rtmp2 = rtmp2.show_rru(mb_rru);
4575                 let default_target = info.default_target.show_rru(mb_rru);
4576                 format!(
4577                     concat!(
4578                         "b.hs {} ; ",
4579                         "adr {}, pc+16 ; ",
4580                         "ldrsw {}, [{}, {}, LSL 2] ; ",
4581                         "add {}, {}, {} ; ",
4582                         "br {} ; ",
4583                         "jt_entries {:?}"
4584                     ),
4585                     default_target,
4586                     rtmp1,
4587                     rtmp2,
4588                     rtmp1,
4589                     ridx,
4590                     rtmp1,
4591                     rtmp1,
4592                     rtmp2,
4593                     rtmp1,
4594                     info.targets
4595                 )
4596             }
4597             &Inst::LoadExtName {
4598                 rd,
4599                 ref name,
4600                 offset,
4601             } => {
4602                 let rd = rd.show_rru(mb_rru);
4603                 format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset)
4604             }
4605             &Inst::LoadAddr { rd, ref mem } => {
4606                 // TODO: we really should find a better way to avoid duplication of
4607                 // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
4608                 // expansion stage (i.e., legalization, but without the slow edit-in-place
4609                 // of the existing legalization framework).
4610                 let (mem_insts, mem) = mem_finalize(0, mem, state);
4611                 let mut ret = String::new();
4612                 for inst in mem_insts.into_iter() {
4613                     ret.push_str(&inst.show_rru(mb_rru));
4614                 }
4615                 let (reg, index_reg, offset) = match mem {
4616                     AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
4617                     AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
4618                     AMode::UnsignedOffset(r, uimm12scaled) => {
4619                         (r, None, uimm12scaled.value() as i32)
4620                     }
4621                     _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
4622                 };
4623                 let abs_offset = if offset < 0 {
4624                     -offset as u64
4625                 } else {
4626                     offset as u64
4627                 };
4628                 let alu_op = if offset < 0 {
4629                     ALUOp::Sub64
4630                 } else {
4631                     ALUOp::Add64
4632                 };
4633 
4634                 if let Some((idx, extendop)) = index_reg {
4635                     let add = Inst::AluRRRExtend {
4636                         alu_op: ALUOp::Add64,
4637                         rd,
4638                         rn: reg,
4639                         rm: idx,
4640                         extendop,
4641                     };
4642 
4643                     ret.push_str(&add.show_rru(mb_rru));
4644                 } else if offset == 0 {
4645                     let mov = Inst::gen_move(rd, reg, I64);
4646                     ret.push_str(&mov.show_rru(mb_rru));
4647                 } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
4648                     let add = Inst::AluRRImm12 {
4649                         alu_op,
4650                         rd,
4651                         rn: reg,
4652                         imm12,
4653                     };
4654                     ret.push_str(&add.show_rru(mb_rru));
4655                 } else {
4656                     let tmp = writable_spilltmp_reg();
4657                     for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
4658                         ret.push_str(&inst.show_rru(mb_rru));
4659                     }
4660                     let add = Inst::AluRRR {
4661                         alu_op,
4662                         rd,
4663                         rn: reg,
4664                         rm: tmp.to_reg(),
4665                     };
4666                     ret.push_str(&add.show_rru(mb_rru));
4667                 }
4668                 ret
4669             }
4670             &Inst::VirtualSPOffsetAdj { offset } => {
4671                 state.virtual_sp_offset += offset;
4672                 format!("virtual_sp_offset_adjust {}", offset)
4673             }
4674             &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space),
4675 
4676             &Inst::ElfTlsGetAddr { ref symbol } => {
4677                 format!("elf_tls_get_addr {}", symbol)
4678             }
4679 
4680             &Inst::ValueLabelMarker { label, reg } => {
4681                 format!("value_label {:?}, {}", label, reg.show_rru(mb_rru))
4682             }
4683 
4684             &Inst::Unwind { ref inst } => {
4685                 format!("unwind {:?}", inst)
4686             }
4687         }
4688     }
4689 }
4690 
4691 //=============================================================================
4692 // Label fixups and jump veneers.
4693 
4694 /// Different forms of label references for different instruction formats.
4695 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
4696 pub enum LabelUse {
4697     /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
4698     /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
4699     Branch19,
4700     /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
4701     /// signed bits, in bits 25:0. Used by b, bl.
4702     Branch26,
4703     /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
4704     /// in bits 23:5.
4705     Ldr19,
4706     /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
4707     /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
4708     Adr21,
4709     /// 32-bit PC relative constant offset (from address of constant itself),
4710     /// signed. Used in jump tables.
4711     PCRel32,
4712 }
4713 
4714 impl MachInstLabelUse for LabelUse {
4715     /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
4716     const ALIGN: CodeOffset = 4;
4717 
4718     /// Maximum PC-relative range (positive), inclusive.
max_pos_range(self) -> CodeOffset4719     fn max_pos_range(self) -> CodeOffset {
4720         match self {
4721             // 19-bit immediate, left-shifted by 2, for 21 bits of total range. Signed, so +2^20
4722             // from zero. Likewise for two other shifted cases below.
4723             LabelUse::Branch19 => (1 << 20) - 1,
4724             LabelUse::Branch26 => (1 << 27) - 1,
4725             LabelUse::Ldr19 => (1 << 20) - 1,
4726             // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
4727             // range.
4728             LabelUse::Adr21 => (1 << 20) - 1,
4729             LabelUse::PCRel32 => 0x7fffffff,
4730         }
4731     }
4732 
4733     /// Maximum PC-relative range (negative).
max_neg_range(self) -> CodeOffset4734     fn max_neg_range(self) -> CodeOffset {
4735         // All forms are twos-complement signed offsets, so negative limit is one more than
4736         // positive limit.
4737         self.max_pos_range() + 1
4738     }
4739 
4740     /// Size of window into code needed to do the patch.
patch_size(self) -> CodeOffset4741     fn patch_size(self) -> CodeOffset {
4742         // Patch is on one instruction only for all of these label reference types.
4743         4
4744     }
4745 
4746     /// Perform the patch.
patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset)4747     fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
4748         let pc_rel = (label_offset as i64) - (use_offset as i64);
4749         debug_assert!(pc_rel <= self.max_pos_range() as i64);
4750         debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
4751         let pc_rel = pc_rel as u32;
4752         let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
4753         let mask = match self {
4754             LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
4755             LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
4756             LabelUse::Ldr19 => 0x00ffffe0,    // bits 23..5 inclusive
4757             LabelUse::Adr21 => 0x60ffffe0,    // bits 30..29, 25..5 inclusive
4758             LabelUse::PCRel32 => 0xffffffff,
4759         };
4760         let pc_rel_shifted = match self {
4761             LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
4762             _ => {
4763                 debug_assert!(pc_rel & 3 == 0);
4764                 pc_rel >> 2
4765             }
4766         };
4767         let pc_rel_inserted = match self {
4768             LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
4769             LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
4770             LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10,
4771             LabelUse::PCRel32 => pc_rel_shifted,
4772         };
4773         let is_add = match self {
4774             LabelUse::PCRel32 => true,
4775             _ => false,
4776         };
4777         let insn_word = if is_add {
4778             insn_word.wrapping_add(pc_rel_inserted)
4779         } else {
4780             (insn_word & !mask) | pc_rel_inserted
4781         };
4782         buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
4783     }
4784 
4785     /// Is a veneer supported for this label reference type?
supports_veneer(self) -> bool4786     fn supports_veneer(self) -> bool {
4787         match self {
4788             LabelUse::Branch19 => true, // veneer is a Branch26
4789             _ => false,
4790         }
4791     }
4792 
4793     /// How large is the veneer, if supported?
veneer_size(self) -> CodeOffset4794     fn veneer_size(self) -> CodeOffset {
4795         4
4796     }
4797 
4798     /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
4799     /// an offset and label-use for the veneer's use of the original label.
generate_veneer( self, buffer: &mut [u8], veneer_offset: CodeOffset, ) -> (CodeOffset, LabelUse)4800     fn generate_veneer(
4801         self,
4802         buffer: &mut [u8],
4803         veneer_offset: CodeOffset,
4804     ) -> (CodeOffset, LabelUse) {
4805         match self {
4806             LabelUse::Branch19 => {
4807                 // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
4808                 // bother with constructing an Inst.
4809                 let insn_word = 0b000101 << 26;
4810                 buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
4811                 (veneer_offset, LabelUse::Branch26)
4812             }
4813             _ => panic!("Unsupported label-reference type for veneer generation!"),
4814         }
4815     }
4816 }
4817