1 //! This module defines aarch64-specific machine instruction types.
2
3 // Some variants are not constructed, but we still want them as options in the future.
4 #![allow(dead_code)]
5
6 use crate::binemit::CodeOffset;
7 use crate::ir::types::{
8 B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64,
9 };
10 use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type, ValueLabel};
11 use crate::isa::unwind::UnwindInst;
12 use crate::isa::CallConv;
13 use crate::machinst::*;
14 use crate::{settings, CodegenError, CodegenResult};
15
16 use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
17 use regalloc::{RegUsageCollector, RegUsageMapper};
18
19 use alloc::boxed::Box;
20 use alloc::vec::Vec;
21 use core::convert::TryFrom;
22 use smallvec::{smallvec, SmallVec};
23 use std::string::{String, ToString};
24
25 pub mod regs;
26 pub use self::regs::*;
27 pub mod imms;
28 pub use self::imms::*;
29 pub mod args;
30 pub use self::args::*;
31 pub mod emit;
32 pub use self::emit::*;
33 use crate::isa::aarch64::abi::AArch64MachineDeps;
34
35 pub mod unwind;
36
37 #[cfg(test)]
38 mod emit_tests;
39
40 //=============================================================================
41 // Instructions (top level): definition
42
43 /// An ALU operation. This can be paired with several instruction formats
44 /// below (see `Inst`) in any combination.
45 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
46 pub enum ALUOp {
47 Add32,
48 Add64,
49 Sub32,
50 Sub64,
51 Orr32,
52 Orr64,
53 OrrNot32,
54 OrrNot64,
55 And32,
56 And64,
57 AndS32,
58 AndS64,
59 AndNot32,
60 AndNot64,
61 /// XOR (AArch64 calls this "EOR")
62 Eor32,
63 /// XOR (AArch64 calls this "EOR")
64 Eor64,
65 /// XNOR (AArch64 calls this "EOR-NOT")
66 EorNot32,
67 /// XNOR (AArch64 calls this "EOR-NOT")
68 EorNot64,
69 /// Add, setting flags
70 AddS32,
71 /// Add, setting flags
72 AddS64,
73 /// Sub, setting flags
74 SubS32,
75 /// Sub, setting flags
76 SubS64,
77 /// Signed multiply, high-word result
78 SMulH,
79 /// Unsigned multiply, high-word result
80 UMulH,
81 SDiv64,
82 UDiv64,
83 RotR32,
84 RotR64,
85 Lsr32,
86 Lsr64,
87 Asr32,
88 Asr64,
89 Lsl32,
90 Lsl64,
91 /// Add with carry
92 Adc32,
93 Adc64,
94 /// Add with carry, settings flags
95 AdcS32,
96 AdcS64,
97 /// Subtract with carry
98 Sbc32,
99 Sbc64,
100 /// Subtract with carry, settings flags
101 SbcS32,
102 SbcS64,
103 }
104
105 /// An ALU operation with three arguments.
106 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
107 pub enum ALUOp3 {
108 /// Multiply-add
109 MAdd32,
110 /// Multiply-add
111 MAdd64,
112 /// Multiply-sub
113 MSub32,
114 /// Multiply-sub
115 MSub64,
116 }
117
118 /// A floating-point unit (FPU) operation with one arg.
119 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
120 pub enum FPUOp1 {
121 Abs32,
122 Abs64,
123 Neg32,
124 Neg64,
125 Sqrt32,
126 Sqrt64,
127 Cvt32To64,
128 Cvt64To32,
129 }
130
131 /// A floating-point unit (FPU) operation with two args.
132 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
133 pub enum FPUOp2 {
134 Add32,
135 Add64,
136 Sub32,
137 Sub64,
138 Mul32,
139 Mul64,
140 Div32,
141 Div64,
142 Max32,
143 Max64,
144 Min32,
145 Min64,
146 /// Signed saturating add
147 Sqadd64,
148 /// Unsigned saturating add
149 Uqadd64,
150 /// Signed saturating subtract
151 Sqsub64,
152 /// Unsigned saturating subtract
153 Uqsub64,
154 }
155
156 /// A floating-point unit (FPU) operation with two args, a register and an immediate.
157 #[derive(Copy, Clone, Debug)]
158 pub enum FPUOpRI {
159 /// Unsigned right shift. Rd = Rn << #imm
160 UShr32(FPURightShiftImm),
161 /// Unsigned right shift. Rd = Rn << #imm
162 UShr64(FPURightShiftImm),
163 /// Shift left and insert. Rd |= Rn << #imm
164 Sli32(FPULeftShiftImm),
165 /// Shift left and insert. Rd |= Rn << #imm
166 Sli64(FPULeftShiftImm),
167 }
168
169 /// A floating-point unit (FPU) operation with three args.
170 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
171 pub enum FPUOp3 {
172 MAdd32,
173 MAdd64,
174 }
175
176 /// A conversion from an FP to an integer value.
177 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
178 pub enum FpuToIntOp {
179 F32ToU32,
180 F32ToI32,
181 F32ToU64,
182 F32ToI64,
183 F64ToU32,
184 F64ToI32,
185 F64ToU64,
186 F64ToI64,
187 }
188
189 /// A conversion from an integer to an FP value.
190 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
191 pub enum IntToFpuOp {
192 U32ToF32,
193 I32ToF32,
194 U32ToF64,
195 I32ToF64,
196 U64ToF32,
197 I64ToF32,
198 U64ToF64,
199 I64ToF64,
200 }
201
202 /// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to
203 /// nearest, and for 32- or 64-bit FP values.
204 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
205 pub enum FpuRoundMode {
206 Minus32,
207 Minus64,
208 Plus32,
209 Plus64,
210 Zero32,
211 Zero64,
212 Nearest32,
213 Nearest64,
214 }
215
216 /// Type of vector element extensions.
217 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
218 pub enum VecExtendOp {
219 /// Signed extension of 8-bit elements
220 Sxtl8,
221 /// Signed extension of 16-bit elements
222 Sxtl16,
223 /// Signed extension of 32-bit elements
224 Sxtl32,
225 /// Unsigned extension of 8-bit elements
226 Uxtl8,
227 /// Unsigned extension of 16-bit elements
228 Uxtl16,
229 /// Unsigned extension of 32-bit elements
230 Uxtl32,
231 }
232
233 /// A vector ALU operation.
234 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
235 pub enum VecALUOp {
236 /// Signed saturating add
237 Sqadd,
238 /// Unsigned saturating add
239 Uqadd,
240 /// Signed saturating subtract
241 Sqsub,
242 /// Unsigned saturating subtract
243 Uqsub,
244 /// Compare bitwise equal
245 Cmeq,
246 /// Compare signed greater than or equal
247 Cmge,
248 /// Compare signed greater than
249 Cmgt,
250 /// Compare unsigned higher
251 Cmhs,
252 /// Compare unsigned higher or same
253 Cmhi,
254 /// Floating-point compare equal
255 Fcmeq,
256 /// Floating-point compare greater than
257 Fcmgt,
258 /// Floating-point compare greater than or equal
259 Fcmge,
260 /// Bitwise and
261 And,
262 /// Bitwise bit clear
263 Bic,
264 /// Bitwise inclusive or
265 Orr,
266 /// Bitwise exclusive or
267 Eor,
268 /// Bitwise select
269 Bsl,
270 /// Unsigned maximum pairwise
271 Umaxp,
272 /// Add
273 Add,
274 /// Subtract
275 Sub,
276 /// Multiply
277 Mul,
278 /// Signed shift left
279 Sshl,
280 /// Unsigned shift left
281 Ushl,
282 /// Unsigned minimum
283 Umin,
284 /// Signed minimum
285 Smin,
286 /// Unsigned maximum
287 Umax,
288 /// Signed maximum
289 Smax,
290 /// Unsigned rounding halving add
291 Urhadd,
292 /// Floating-point add
293 Fadd,
294 /// Floating-point subtract
295 Fsub,
296 /// Floating-point divide
297 Fdiv,
298 /// Floating-point maximum
299 Fmax,
300 /// Floating-point minimum
301 Fmin,
302 /// Floating-point multiply
303 Fmul,
304 /// Add pairwise
305 Addp,
306 /// Zip vectors (primary) [meaning, high halves]
307 Zip1,
308 /// Signed saturating rounding doubling multiply returning high half
309 Sqrdmulh,
310 }
311
312 /// A Vector miscellaneous operation with two registers.
313 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
314 pub enum VecMisc2 {
315 /// Bitwise NOT
316 Not,
317 /// Negate
318 Neg,
319 /// Absolute value
320 Abs,
321 /// Floating-point absolute value
322 Fabs,
323 /// Floating-point negate
324 Fneg,
325 /// Floating-point square root
326 Fsqrt,
327 /// Reverse elements in 64-bit doublewords
328 Rev64,
329 /// Floating-point convert to signed integer, rounding toward zero
330 Fcvtzs,
331 /// Floating-point convert to unsigned integer, rounding toward zero
332 Fcvtzu,
333 /// Signed integer convert to floating-point
334 Scvtf,
335 /// Unsigned integer convert to floating-point
336 Ucvtf,
337 /// Floating point round to integral, rounding towards nearest
338 Frintn,
339 /// Floating point round to integral, rounding towards zero
340 Frintz,
341 /// Floating point round to integral, rounding towards minus infinity
342 Frintm,
343 /// Floating point round to integral, rounding towards plus infinity
344 Frintp,
345 /// Population count per byte
346 Cnt,
347 /// Compare bitwise equal to 0
348 Cmeq0,
349 }
350
351 /// A vector widening operation with one argument.
352 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
353 pub enum VecRRLongOp {
354 /// Floating-point convert to higher precision long, 16-bit elements
355 Fcvtl16,
356 /// Floating-point convert to higher precision long, 32-bit elements
357 Fcvtl32,
358 /// Shift left long (by element size), 8-bit elements
359 Shll8,
360 /// Shift left long (by element size), 16-bit elements
361 Shll16,
362 /// Shift left long (by element size), 32-bit elements
363 Shll32,
364 }
365
366 /// A vector narrowing operation with one argument.
367 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
368 pub enum VecRRNarrowOp {
369 /// Extract narrow, 16-bit elements
370 Xtn16,
371 /// Extract narrow, 32-bit elements
372 Xtn32,
373 /// Extract narrow, 64-bit elements
374 Xtn64,
375 /// Signed saturating extract narrow, 16-bit elements
376 Sqxtn16,
377 /// Signed saturating extract narrow, 32-bit elements
378 Sqxtn32,
379 /// Signed saturating extract narrow, 64-bit elements
380 Sqxtn64,
381 /// Signed saturating extract unsigned narrow, 16-bit elements
382 Sqxtun16,
383 /// Signed saturating extract unsigned narrow, 32-bit elements
384 Sqxtun32,
385 /// Signed saturating extract unsigned narrow, 64-bit elements
386 Sqxtun64,
387 /// Unsigned saturating extract narrow, 16-bit elements
388 Uqxtn16,
389 /// Unsigned saturating extract narrow, 32-bit elements
390 Uqxtn32,
391 /// Unsigned saturating extract narrow, 64-bit elements
392 Uqxtn64,
393 /// Floating-point convert to lower precision narrow, 32-bit elements
394 Fcvtn32,
395 /// Floating-point convert to lower precision narrow, 64-bit elements
396 Fcvtn64,
397 }
398
399 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
400 pub enum VecRRRLongOp {
401 /// Signed multiply long.
402 Smull8,
403 Smull16,
404 Smull32,
405 /// Unsigned multiply long.
406 Umull8,
407 Umull16,
408 Umull32,
409 /// Unsigned multiply add long
410 Umlal8,
411 Umlal16,
412 Umlal32,
413 }
414
415 /// A vector operation on a pair of elements with one register.
416 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
417 pub enum VecPairOp {
418 /// Add pair of elements
419 Addp,
420 }
421
422 /// 1-operand vector instruction that extends elements of the input register
423 /// and operates on a pair of elements.
424 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
425 pub enum VecRRPairLongOp {
426 /// Sign extend and add pair of elements
427 Saddlp8,
428 Saddlp16,
429 /// Unsigned extend and add pair of elements
430 Uaddlp8,
431 Uaddlp16,
432 }
433
434 /// An operation across the lanes of vectors.
435 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
436 pub enum VecLanesOp {
437 /// Integer addition across a vector
438 Addv,
439 /// Unsigned minimum across a vector
440 Uminv,
441 }
442
443 /// A shift-by-immediate operation on each lane of a vector.
444 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
445 pub enum VecShiftImmOp {
446 // Unsigned shift left
447 Shl,
448 // Unsigned shift right
449 Ushr,
450 // Signed shift right
451 Sshr,
452 }
453
454 /// An operation on the bits of a register. This can be paired with several instruction formats
455 /// below (see `Inst`) in any combination.
456 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
457 pub enum BitOp {
458 /// Bit reverse
459 RBit32,
460 /// Bit reverse
461 RBit64,
462 Clz32,
463 Clz64,
464 Cls32,
465 Cls64,
466 }
467
468 impl BitOp {
469 /// What is the opcode's native width?
operand_size(&self) -> OperandSize470 pub fn operand_size(&self) -> OperandSize {
471 match self {
472 BitOp::RBit32 | BitOp::Clz32 | BitOp::Cls32 => OperandSize::Size32,
473 _ => OperandSize::Size64,
474 }
475 }
476
477 /// Get the assembly mnemonic for this opcode.
op_str(&self) -> &'static str478 pub fn op_str(&self) -> &'static str {
479 match self {
480 BitOp::RBit32 | BitOp::RBit64 => "rbit",
481 BitOp::Clz32 | BitOp::Clz64 => "clz",
482 BitOp::Cls32 | BitOp::Cls64 => "cls",
483 }
484 }
485 }
486
487 impl From<(Opcode, Type)> for BitOp {
488 /// Get the BitOp from the IR opcode.
from(op_ty: (Opcode, Type)) -> BitOp489 fn from(op_ty: (Opcode, Type)) -> BitOp {
490 match op_ty {
491 (Opcode::Bitrev, I32) => BitOp::RBit32,
492 (Opcode::Bitrev, I64) => BitOp::RBit64,
493 (Opcode::Clz, I32) => BitOp::Clz32,
494 (Opcode::Clz, I64) => BitOp::Clz64,
495 (Opcode::Cls, I32) => BitOp::Cls32,
496 (Opcode::Cls, I64) => BitOp::Cls64,
497 _ => unreachable!("Called with non-bit op!: {:?}", op_ty),
498 }
499 }
500 }
501
502 /// Additional information for (direct) Call instructions, left out of line to lower the size of
503 /// the Inst enum.
504 #[derive(Clone, Debug)]
505 pub struct CallInfo {
506 pub dest: ExternalName,
507 pub uses: Vec<Reg>,
508 pub defs: Vec<Writable<Reg>>,
509 pub opcode: Opcode,
510 pub caller_callconv: CallConv,
511 pub callee_callconv: CallConv,
512 }
513
514 /// Additional information for CallInd instructions, left out of line to lower the size of the Inst
515 /// enum.
516 #[derive(Clone, Debug)]
517 pub struct CallIndInfo {
518 pub rn: Reg,
519 pub uses: Vec<Reg>,
520 pub defs: Vec<Writable<Reg>>,
521 pub opcode: Opcode,
522 pub caller_callconv: CallConv,
523 pub callee_callconv: CallConv,
524 }
525
526 /// Additional information for JTSequence instructions, left out of line to lower the size of the Inst
527 /// enum.
528 #[derive(Clone, Debug)]
529 pub struct JTSequenceInfo {
530 pub targets: Vec<BranchTarget>,
531 pub default_target: BranchTarget,
532 pub targets_for_term: Vec<MachLabel>, // needed for MachTerminator.
533 }
534
535 /// Instruction formats.
536 #[derive(Clone, Debug)]
537 pub enum Inst {
538 /// A no-op of zero size.
539 Nop0,
540
541 /// A no-op that is one instruction large.
542 Nop4,
543
544 /// An ALU operation with two register sources and a register destination.
545 AluRRR {
546 alu_op: ALUOp,
547 rd: Writable<Reg>,
548 rn: Reg,
549 rm: Reg,
550 },
551 /// An ALU operation with three register sources and a register destination.
552 AluRRRR {
553 alu_op: ALUOp3,
554 rd: Writable<Reg>,
555 rn: Reg,
556 rm: Reg,
557 ra: Reg,
558 },
559 /// An ALU operation with a register source and an immediate-12 source, and a register
560 /// destination.
561 AluRRImm12 {
562 alu_op: ALUOp,
563 rd: Writable<Reg>,
564 rn: Reg,
565 imm12: Imm12,
566 },
567 /// An ALU operation with a register source and an immediate-logic source, and a register destination.
568 AluRRImmLogic {
569 alu_op: ALUOp,
570 rd: Writable<Reg>,
571 rn: Reg,
572 imml: ImmLogic,
573 },
574 /// An ALU operation with a register source and an immediate-shiftamt source, and a register destination.
575 AluRRImmShift {
576 alu_op: ALUOp,
577 rd: Writable<Reg>,
578 rn: Reg,
579 immshift: ImmShift,
580 },
581 /// An ALU operation with two register sources, one of which can be shifted, and a register
582 /// destination.
583 AluRRRShift {
584 alu_op: ALUOp,
585 rd: Writable<Reg>,
586 rn: Reg,
587 rm: Reg,
588 shiftop: ShiftOpAndAmt,
589 },
590 /// An ALU operation with two register sources, one of which can be {zero,sign}-extended and
591 /// shifted, and a register destination.
592 AluRRRExtend {
593 alu_op: ALUOp,
594 rd: Writable<Reg>,
595 rn: Reg,
596 rm: Reg,
597 extendop: ExtendOp,
598 },
599
600 /// A bit op instruction with a single register source.
601 BitRR {
602 op: BitOp,
603 rd: Writable<Reg>,
604 rn: Reg,
605 },
606
607 /// An unsigned (zero-extending) 8-bit load.
608 ULoad8 {
609 rd: Writable<Reg>,
610 mem: AMode,
611 flags: MemFlags,
612 },
613 /// A signed (sign-extending) 8-bit load.
614 SLoad8 {
615 rd: Writable<Reg>,
616 mem: AMode,
617 flags: MemFlags,
618 },
619 /// An unsigned (zero-extending) 16-bit load.
620 ULoad16 {
621 rd: Writable<Reg>,
622 mem: AMode,
623 flags: MemFlags,
624 },
625 /// A signed (sign-extending) 16-bit load.
626 SLoad16 {
627 rd: Writable<Reg>,
628 mem: AMode,
629 flags: MemFlags,
630 },
631 /// An unsigned (zero-extending) 32-bit load.
632 ULoad32 {
633 rd: Writable<Reg>,
634 mem: AMode,
635 flags: MemFlags,
636 },
637 /// A signed (sign-extending) 32-bit load.
638 SLoad32 {
639 rd: Writable<Reg>,
640 mem: AMode,
641 flags: MemFlags,
642 },
643 /// A 64-bit load.
644 ULoad64 {
645 rd: Writable<Reg>,
646 mem: AMode,
647 flags: MemFlags,
648 },
649
650 /// An 8-bit store.
651 Store8 {
652 rd: Reg,
653 mem: AMode,
654 flags: MemFlags,
655 },
656 /// A 16-bit store.
657 Store16 {
658 rd: Reg,
659 mem: AMode,
660 flags: MemFlags,
661 },
662 /// A 32-bit store.
663 Store32 {
664 rd: Reg,
665 mem: AMode,
666 flags: MemFlags,
667 },
668 /// A 64-bit store.
669 Store64 {
670 rd: Reg,
671 mem: AMode,
672 flags: MemFlags,
673 },
674
675 /// A store of a pair of registers.
676 StoreP64 {
677 rt: Reg,
678 rt2: Reg,
679 mem: PairAMode,
680 flags: MemFlags,
681 },
682 /// A load of a pair of registers.
683 LoadP64 {
684 rt: Writable<Reg>,
685 rt2: Writable<Reg>,
686 mem: PairAMode,
687 flags: MemFlags,
688 },
689
690 /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we
691 /// keep them separate at the `Inst` level for better pretty-printing
692 /// and faster `is_move()` logic.
693 Mov64 {
694 rd: Writable<Reg>,
695 rm: Reg,
696 },
697
698 /// A 32-bit MOV. Zeroes the top 32 bits of the destination. This is
699 /// effectively an alias for an unsigned 32-to-64-bit extension.
700 Mov32 {
701 rd: Writable<Reg>,
702 rm: Reg,
703 },
704
705 /// A MOVZ with a 16-bit immediate.
706 MovZ {
707 rd: Writable<Reg>,
708 imm: MoveWideConst,
709 size: OperandSize,
710 },
711
712 /// A MOVN with a 16-bit immediate.
713 MovN {
714 rd: Writable<Reg>,
715 imm: MoveWideConst,
716 size: OperandSize,
717 },
718
719 /// A MOVK with a 16-bit immediate.
720 MovK {
721 rd: Writable<Reg>,
722 imm: MoveWideConst,
723 size: OperandSize,
724 },
725
726 /// A sign- or zero-extend operation.
727 Extend {
728 rd: Writable<Reg>,
729 rn: Reg,
730 signed: bool,
731 from_bits: u8,
732 to_bits: u8,
733 },
734
735 /// A conditional-select operation.
736 CSel {
737 rd: Writable<Reg>,
738 cond: Cond,
739 rn: Reg,
740 rm: Reg,
741 },
742
743 /// A conditional-set operation.
744 CSet {
745 rd: Writable<Reg>,
746 cond: Cond,
747 },
748
749 /// A conditional-set-mask operation.
750 CSetm {
751 rd: Writable<Reg>,
752 cond: Cond,
753 },
754
755 /// A conditional comparison with an immediate.
756 CCmpImm {
757 size: OperandSize,
758 rn: Reg,
759 imm: UImm5,
760 nzcv: NZCV,
761 cond: Cond,
762 },
763
764 /// A synthetic insn, which is a load-linked store-conditional loop, that has the overall
765 /// effect of atomically modifying a memory location in a particular way. Because we have
766 /// no way to explain to the regalloc about earlyclobber registers, this instruction has
767 /// completely fixed operand registers, and we rely on the RA's coalescing to remove copies
768 /// in the surrounding code to the extent it can. The sequence is both preceded and
769 /// followed by a fence which is at least as comprehensive as that of the `Fence`
770 /// instruction below. This instruction is sequentially consistent. The operand
771 /// conventions are:
772 ///
773 /// x25 (rd) address
774 /// x26 (rd) second operand for `op`
775 /// x27 (wr) old value
776 /// x24 (wr) scratch reg; value afterwards has no meaning
777 /// x28 (wr) scratch reg; value afterwards has no meaning
778 AtomicRMW {
779 ty: Type, // I8, I16, I32 or I64
780 op: inst_common::AtomicRmwOp,
781 },
782
783 /// An atomic compare-and-swap operation. This instruction is sequentially consistent.
784 AtomicCAS {
785 rs: Writable<Reg>,
786 rt: Reg,
787 rn: Reg,
788 ty: Type,
789 },
790
791 /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
792 /// store-conditional loop.
793 /// This instruction is sequentially consistent.
794 /// Note that the operand conventions, although very similar to AtomicRMW, are different:
795 ///
796 /// x25 (rd) address
797 /// x26 (rd) expected value
798 /// x28 (rd) replacement value
799 /// x27 (wr) old value
800 /// x24 (wr) scratch reg; value afterwards has no meaning
801 AtomicCASLoop {
802 ty: Type, // I8, I16, I32 or I64
803 },
804
805 /// Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put
806 /// it in `rn`, optionally zero-extending to fill a word or double word result.
807 /// This instruction is sequentially consistent.
808 LoadAcquire {
809 access_ty: Type, // I8, I16, I32 or I64
810 rt: Writable<Reg>,
811 rn: Reg,
812 },
813
814 /// Write the lowest `ty` bits of `rt` to address `rn`.
815 /// This instruction is sequentially consistent.
816 StoreRelease {
817 access_ty: Type, // I8, I16, I32 or I64
818 rt: Reg,
819 rn: Reg,
820 },
821
822 /// A memory fence. This must provide ordering to ensure that, at a minimum, neither loads
823 /// nor stores may move forwards or backwards across the fence. Currently emitted as "dmb
824 /// ish". This instruction is sequentially consistent.
825 Fence,
826
827 /// FPU move. Note that this is distinct from a vector-register
828 /// move; moving just 64 bits seems to be significantly faster.
829 FpuMove64 {
830 rd: Writable<Reg>,
831 rn: Reg,
832 },
833
834 /// Vector register move.
835 FpuMove128 {
836 rd: Writable<Reg>,
837 rn: Reg,
838 },
839
840 /// Move to scalar from a vector element.
841 FpuMoveFromVec {
842 rd: Writable<Reg>,
843 rn: Reg,
844 idx: u8,
845 size: VectorSize,
846 },
847
848 /// Zero-extend a SIMD & FP scalar to the full width of a vector register.
849 FpuExtend {
850 rd: Writable<Reg>,
851 rn: Reg,
852 size: ScalarSize,
853 },
854
855 /// 1-op FPU instruction.
856 FpuRR {
857 fpu_op: FPUOp1,
858 rd: Writable<Reg>,
859 rn: Reg,
860 },
861
862 /// 2-op FPU instruction.
863 FpuRRR {
864 fpu_op: FPUOp2,
865 rd: Writable<Reg>,
866 rn: Reg,
867 rm: Reg,
868 },
869
870 FpuRRI {
871 fpu_op: FPUOpRI,
872 rd: Writable<Reg>,
873 rn: Reg,
874 },
875
876 /// 3-op FPU instruction.
877 FpuRRRR {
878 fpu_op: FPUOp3,
879 rd: Writable<Reg>,
880 rn: Reg,
881 rm: Reg,
882 ra: Reg,
883 },
884
885 /// FPU comparison, single-precision (32 bit).
886 FpuCmp32 {
887 rn: Reg,
888 rm: Reg,
889 },
890
891 /// FPU comparison, double-precision (64 bit).
892 FpuCmp64 {
893 rn: Reg,
894 rm: Reg,
895 },
896
897 /// Floating-point load, single-precision (32 bit).
898 FpuLoad32 {
899 rd: Writable<Reg>,
900 mem: AMode,
901 flags: MemFlags,
902 },
903 /// Floating-point store, single-precision (32 bit).
904 FpuStore32 {
905 rd: Reg,
906 mem: AMode,
907 flags: MemFlags,
908 },
909 /// Floating-point load, double-precision (64 bit).
910 FpuLoad64 {
911 rd: Writable<Reg>,
912 mem: AMode,
913 flags: MemFlags,
914 },
915 /// Floating-point store, double-precision (64 bit).
916 FpuStore64 {
917 rd: Reg,
918 mem: AMode,
919 flags: MemFlags,
920 },
921 /// Floating-point/vector load, 128 bit.
922 FpuLoad128 {
923 rd: Writable<Reg>,
924 mem: AMode,
925 flags: MemFlags,
926 },
927 /// Floating-point/vector store, 128 bit.
928 FpuStore128 {
929 rd: Reg,
930 mem: AMode,
931 flags: MemFlags,
932 },
933 /// A load of a pair of floating-point registers, double precision (64-bit).
934 FpuLoadP64 {
935 rt: Writable<Reg>,
936 rt2: Writable<Reg>,
937 mem: PairAMode,
938 flags: MemFlags,
939 },
940 /// A store of a pair of floating-point registers, double precision (64-bit).
941 FpuStoreP64 {
942 rt: Reg,
943 rt2: Reg,
944 mem: PairAMode,
945 flags: MemFlags,
946 },
947 /// A load of a pair of floating-point registers, 128-bit.
948 FpuLoadP128 {
949 rt: Writable<Reg>,
950 rt2: Writable<Reg>,
951 mem: PairAMode,
952 flags: MemFlags,
953 },
954 /// A store of a pair of floating-point registers, 128-bit.
955 FpuStoreP128 {
956 rt: Reg,
957 rt2: Reg,
958 mem: PairAMode,
959 flags: MemFlags,
960 },
961 LoadFpuConst64 {
962 rd: Writable<Reg>,
963 const_data: u64,
964 },
965
966 LoadFpuConst128 {
967 rd: Writable<Reg>,
968 const_data: u128,
969 },
970
971 /// Conversion: FP -> integer.
972 FpuToInt {
973 op: FpuToIntOp,
974 rd: Writable<Reg>,
975 rn: Reg,
976 },
977
978 /// Conversion: integer -> FP.
979 IntToFpu {
980 op: IntToFpuOp,
981 rd: Writable<Reg>,
982 rn: Reg,
983 },
984
985 /// FP conditional select, 32 bit.
986 FpuCSel32 {
987 rd: Writable<Reg>,
988 rn: Reg,
989 rm: Reg,
990 cond: Cond,
991 },
992 /// FP conditional select, 64 bit.
993 FpuCSel64 {
994 rd: Writable<Reg>,
995 rn: Reg,
996 rm: Reg,
997 cond: Cond,
998 },
999
1000 /// Round to integer.
1001 FpuRound {
1002 op: FpuRoundMode,
1003 rd: Writable<Reg>,
1004 rn: Reg,
1005 },
1006
1007 /// Move from a GPR to a vector register. The scalar value is parked in the lowest lane
1008 /// of the destination, and all other lanes are zeroed out. Currently only 32- and 64-bit
1009 /// transactions are supported.
1010 MovToFpu {
1011 rd: Writable<Reg>,
1012 rn: Reg,
1013 size: ScalarSize,
1014 },
1015
1016 /// Move to a vector element from a GPR.
1017 MovToVec {
1018 rd: Writable<Reg>,
1019 rn: Reg,
1020 idx: u8,
1021 size: VectorSize,
1022 },
1023
1024 /// Unsigned move from a vector element to a GPR.
1025 MovFromVec {
1026 rd: Writable<Reg>,
1027 rn: Reg,
1028 idx: u8,
1029 size: VectorSize,
1030 },
1031
1032 /// Signed move from a vector element to a GPR.
1033 MovFromVecSigned {
1034 rd: Writable<Reg>,
1035 rn: Reg,
1036 idx: u8,
1037 size: VectorSize,
1038 scalar_size: OperandSize,
1039 },
1040
1041 /// Duplicate general-purpose register to vector.
1042 VecDup {
1043 rd: Writable<Reg>,
1044 rn: Reg,
1045 size: VectorSize,
1046 },
1047
1048 /// Duplicate scalar to vector.
1049 VecDupFromFpu {
1050 rd: Writable<Reg>,
1051 rn: Reg,
1052 size: VectorSize,
1053 },
1054
1055 /// Duplicate FP immediate to vector.
1056 VecDupFPImm {
1057 rd: Writable<Reg>,
1058 imm: ASIMDFPModImm,
1059 size: VectorSize,
1060 },
1061
1062 /// Duplicate immediate to vector.
1063 VecDupImm {
1064 rd: Writable<Reg>,
1065 imm: ASIMDMovModImm,
1066 invert: bool,
1067 size: VectorSize,
1068 },
1069
1070 /// Vector extend.
1071 VecExtend {
1072 t: VecExtendOp,
1073 rd: Writable<Reg>,
1074 rn: Reg,
1075 high_half: bool,
1076 },
1077
1078 /// Move vector element to another vector element.
1079 VecMovElement {
1080 rd: Writable<Reg>,
1081 rn: Reg,
1082 dest_idx: u8,
1083 src_idx: u8,
1084 size: VectorSize,
1085 },
1086
1087 /// Vector widening operation.
1088 VecRRLong {
1089 op: VecRRLongOp,
1090 rd: Writable<Reg>,
1091 rn: Reg,
1092 high_half: bool,
1093 },
1094
1095 /// Vector narrowing operation.
1096 VecRRNarrow {
1097 op: VecRRNarrowOp,
1098 rd: Writable<Reg>,
1099 rn: Reg,
1100 high_half: bool,
1101 },
1102
1103 /// 1-operand vector instruction that operates on a pair of elements.
1104 VecRRPair {
1105 op: VecPairOp,
1106 rd: Writable<Reg>,
1107 rn: Reg,
1108 },
1109
1110 /// 2-operand vector instruction that produces a result with twice the
1111 /// lane width and half the number of lanes.
1112 VecRRRLong {
1113 alu_op: VecRRRLongOp,
1114 rd: Writable<Reg>,
1115 rn: Reg,
1116 rm: Reg,
1117 high_half: bool,
1118 },
1119
1120 /// 1-operand vector instruction that extends elements of the input
1121 /// register and operates on a pair of elements. The output lane width
1122 /// is double that of the input.
1123 VecRRPairLong {
1124 op: VecRRPairLongOp,
1125 rd: Writable<Reg>,
1126 rn: Reg,
1127 },
1128
1129 /// A vector ALU op.
1130 VecRRR {
1131 alu_op: VecALUOp,
1132 rd: Writable<Reg>,
1133 rn: Reg,
1134 rm: Reg,
1135 size: VectorSize,
1136 },
1137
1138 /// Vector two register miscellaneous instruction.
1139 VecMisc {
1140 op: VecMisc2,
1141 rd: Writable<Reg>,
1142 rn: Reg,
1143 size: VectorSize,
1144 },
1145
1146 /// Vector instruction across lanes.
1147 VecLanes {
1148 op: VecLanesOp,
1149 rd: Writable<Reg>,
1150 rn: Reg,
1151 size: VectorSize,
1152 },
1153
1154 /// Vector shift by immediate: Shift Left (immediate), Unsigned Shift Right (immediate),
1155 /// Signed Shift Right (immediate). These are somewhat unusual in that, for right shifts,
1156 /// the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive. A zero
1157 /// right-shift cannot be encoded. Left shifts are "normal", though, having valid `imm`
1158 /// values from 0 to lane-size-in-bits - 1 inclusive.
1159 VecShiftImm {
1160 op: VecShiftImmOp,
1161 rd: Writable<Reg>,
1162 rn: Reg,
1163 size: VectorSize,
1164 imm: u8,
1165 },
1166
1167 /// Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
1168 /// of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
1169 VecExtract {
1170 rd: Writable<Reg>,
1171 rn: Reg,
1172 rm: Reg,
1173 imm4: u8,
1174 },
1175
1176 /// Table vector lookup - single register table. The table consists of 8-bit elements and is
1177 /// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
1178 /// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
1179 /// vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
1180 /// to 0.
1181 VecTbl {
1182 rd: Writable<Reg>,
1183 rn: Reg,
1184 rm: Reg,
1185 is_extension: bool,
1186 },
1187
1188 /// Table vector lookup - two register table. The table consists of 8-bit elements and is
1189 /// stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
1190 /// specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
1191 /// the destination vector that correspond to out-of-range indices (greater than 31) unmodified
1192 /// or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
1193 /// modulo 32, that is v31 and v0 (in that order) are consecutive registers.
1194 VecTbl2 {
1195 rd: Writable<Reg>,
1196 rn: Reg,
1197 rn2: Reg,
1198 rm: Reg,
1199 is_extension: bool,
1200 },
1201
1202 /// Load an element and replicate to all lanes of a vector.
1203 VecLoadReplicate {
1204 rd: Writable<Reg>,
1205 rn: Reg,
1206 size: VectorSize,
1207 },
1208
1209 /// Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn
1210 /// control-flow diamond.
1211 VecCSel {
1212 rd: Writable<Reg>,
1213 rn: Reg,
1214 rm: Reg,
1215 cond: Cond,
1216 },
1217
1218 /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
1219 MovToNZCV {
1220 rn: Reg,
1221 },
1222
1223 /// Move from the NZCV flags (actually a `MRS Xn, NZCV` insn).
1224 MovFromNZCV {
1225 rd: Writable<Reg>,
1226 },
1227
1228 /// A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation
1229 /// of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the
1230 /// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
1231 /// target.
1232 Call {
1233 info: Box<CallInfo>,
1234 },
1235 /// A machine indirect-call instruction.
1236 CallInd {
1237 info: Box<CallIndInfo>,
1238 },
1239
1240 // ---- branches (exactly one must appear at end of BB) ----
1241 /// A machine return instruction.
1242 Ret,
1243
1244 /// A placeholder instruction, generating no code, meaning that a function epilogue must be
1245 /// inserted there.
1246 EpiloguePlaceholder,
1247
1248 /// An unconditional branch.
1249 Jump {
1250 dest: BranchTarget,
1251 },
1252
1253 /// A conditional branch. Contains two targets; at emission time, both are emitted, but
1254 /// the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the
1255 /// choice of taken/not_taken (inverting the branch polarity as needed) based on the
1256 /// fallthrough at the time of lowering.
1257 CondBr {
1258 taken: BranchTarget,
1259 not_taken: BranchTarget,
1260 kind: CondBrKind,
1261 },
1262
1263 /// A conditional trap: execute a `udf` if the condition is true. This is
1264 /// one VCode instruction because it uses embedded control flow; it is
1265 /// logically a single-in, single-out region, but needs to appear as one
1266 /// unit to the register allocator.
1267 ///
1268 /// The `CondBrKind` gives the conditional-branch condition that will
1269 /// *execute* the embedded `Inst`. (In the emitted code, we use the inverse
1270 /// of this condition in a branch that skips the trap instruction.)
1271 TrapIf {
1272 kind: CondBrKind,
1273 trap_code: TrapCode,
1274 },
1275
1276 /// An indirect branch through a register, augmented with set of all
1277 /// possible successors.
1278 IndirectBr {
1279 rn: Reg,
1280 targets: Vec<MachLabel>,
1281 },
1282
1283 /// A "break" instruction, used for e.g. traps and debug breakpoints.
1284 Brk,
1285
1286 /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at
1287 /// runtime.
1288 Udf {
1289 trap_code: TrapCode,
1290 },
1291
1292 /// Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
1293 /// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
1294 /// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
1295 /// need full `MemLabel` support.
1296 Adr {
1297 rd: Writable<Reg>,
1298 /// Offset in range -2^20 .. 2^20.
1299 off: i32,
1300 },
1301
1302 /// Raw 32-bit word, used for inline constants and jump-table entries.
1303 Word4 {
1304 data: u32,
1305 },
1306
1307 /// Raw 64-bit word, used for inline constants.
1308 Word8 {
1309 data: u64,
1310 },
1311
1312 /// Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale).
1313 JTSequence {
1314 info: Box<JTSequenceInfo>,
1315 ridx: Reg,
1316 rtmp1: Writable<Reg>,
1317 rtmp2: Writable<Reg>,
1318 },
1319
1320 /// Load an inline symbol reference.
1321 LoadExtName {
1322 rd: Writable<Reg>,
1323 name: Box<ExternalName>,
1324 offset: i64,
1325 },
1326
1327 /// Load address referenced by `mem` into `rd`.
1328 LoadAddr {
1329 rd: Writable<Reg>,
1330 mem: AMode,
1331 },
1332
1333 /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
1334 /// controls how AMode::NominalSPOffset args are lowered.
1335 VirtualSPOffsetAdj {
1336 offset: i64,
1337 },
1338
1339 /// Meta-insn, no-op in generated code: emit constant/branch veneer island
1340 /// at this point (with a guard jump around it) if less than the needed
1341 /// space is available before the next branch deadline. See the `MachBuffer`
1342 /// implementation in `machinst/buffer.rs` for the overall algorithm. In
1343 /// brief, we retain a set of "pending/unresolved label references" from
1344 /// branches as we scan forward through instructions to emit machine code;
1345 /// if we notice we're about to go out of range on an unresolved reference,
1346 /// we stop, emit a bunch of "veneers" (branches in a form that has a longer
1347 /// range, e.g. a 26-bit-offset unconditional jump), and point the original
1348 /// label references to those. This is an "island" because it comes in the
1349 /// middle of the code.
1350 ///
1351 /// This meta-instruction is a necessary part of the logic that determines
1352 /// where to place islands. Ordinarily, we want to place them between basic
1353 /// blocks, so we compute the worst-case size of each block, and emit the
1354 /// island before starting a block if we would exceed a deadline before the
1355 /// end of the block. However, some sequences (such as an inline jumptable)
1356 /// are variable-length and not accounted for by this logic; so these
1357 /// lowered sequences include an `EmitIsland` to trigger island generation
1358 /// where necessary.
1359 EmitIsland {
1360 /// The needed space before the next deadline.
1361 needed_space: CodeOffset,
1362 },
1363
1364 /// A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol in x0.
1365 ElfTlsGetAddr {
1366 symbol: ExternalName,
1367 },
1368
1369 /// A definition of a value label.
1370 ValueLabelMarker {
1371 reg: Reg,
1372 label: ValueLabel,
1373 },
1374
1375 /// An unwind pseudo-instruction.
1376 Unwind {
1377 inst: UnwindInst,
1378 },
1379 }
1380
count_zero_half_words(mut value: u64, num_half_words: u8) -> usize1381 fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
1382 let mut count = 0;
1383 for _ in 0..num_half_words {
1384 if value & 0xffff == 0 {
1385 count += 1;
1386 }
1387 value >>= 16;
1388 }
1389
1390 count
1391 }
1392
1393 #[test]
inst_size_test()1394 fn inst_size_test() {
1395 // This test will help with unintentionally growing the size
1396 // of the Inst enum.
1397 assert_eq!(32, std::mem::size_of::<Inst>());
1398 }
1399
1400 impl Inst {
1401 /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
1402 /// logical immediate, or constant pool).
load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]>1403 pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
1404 if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
1405 // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
1406 smallvec![Inst::MovZ {
1407 rd,
1408 imm,
1409 size: OperandSize::Size64
1410 }]
1411 } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
1412 // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
1413 smallvec![Inst::MovN {
1414 rd,
1415 imm,
1416 size: OperandSize::Size64
1417 }]
1418 } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
1419 // Weird logical-instruction immediate in ORI using zero register
1420 smallvec![Inst::AluRRImmLogic {
1421 alu_op: ALUOp::Orr64,
1422 rd,
1423 rn: zero_reg(),
1424 imml,
1425 }]
1426 } else {
1427 let mut insts = smallvec![];
1428
1429 // If the top 32 bits are zero, use 32-bit `mov` operations.
1430 let (num_half_words, size, negated) = if value >> 32 == 0 {
1431 (2, OperandSize::Size32, (!value << 32) >> 32)
1432 } else {
1433 (4, OperandSize::Size64, !value)
1434 };
1435 // If the number of 0xffff half words is greater than the number of 0x0000 half words
1436 // it is more efficient to use `movn` for the first instruction.
1437 let first_is_inverted = count_zero_half_words(negated, num_half_words)
1438 > count_zero_half_words(value, num_half_words);
1439 // Either 0xffff or 0x0000 half words can be skipped, depending on the first
1440 // instruction used.
1441 let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
1442 let mut first_mov_emitted = false;
1443
1444 for i in 0..num_half_words {
1445 let imm16 = (value >> (16 * i)) & 0xffff;
1446 if imm16 != ignored_halfword {
1447 if !first_mov_emitted {
1448 first_mov_emitted = true;
1449 if first_is_inverted {
1450 let imm =
1451 MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
1452 .unwrap();
1453 insts.push(Inst::MovN { rd, imm, size });
1454 } else {
1455 let imm =
1456 MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
1457 insts.push(Inst::MovZ { rd, imm, size });
1458 }
1459 } else {
1460 let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
1461 insts.push(Inst::MovK { rd, imm, size });
1462 }
1463 }
1464 }
1465
1466 assert!(first_mov_emitted);
1467
1468 insts
1469 }
1470 }
1471
1472 /// Create instructions that load a 128-bit constant.
load_constant128(to_regs: ValueRegs<Writable<Reg>>, value: u128) -> SmallVec<[Inst; 4]>1473 pub fn load_constant128(to_regs: ValueRegs<Writable<Reg>>, value: u128) -> SmallVec<[Inst; 4]> {
1474 assert_eq!(to_regs.len(), 2, "Expected to load i128 into two registers");
1475
1476 let lower = value as u64;
1477 let upper = (value >> 64) as u64;
1478
1479 let lower_reg = to_regs.regs()[0];
1480 let upper_reg = to_regs.regs()[1];
1481
1482 let mut load_ins = Inst::load_constant(lower_reg, lower);
1483 let load_upper = Inst::load_constant(upper_reg, upper);
1484
1485 load_ins.extend(load_upper.into_iter());
1486 load_ins
1487 }
1488
1489 /// Create instructions that load a 32-bit floating-point constant.
load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>( rd: Writable<Reg>, value: u32, mut alloc_tmp: F, ) -> SmallVec<[Inst; 4]>1490 pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
1491 rd: Writable<Reg>,
1492 value: u32,
1493 mut alloc_tmp: F,
1494 ) -> SmallVec<[Inst; 4]> {
1495 // Note that we must make sure that all bits outside the lowest 32 are set to 0
1496 // because this function is also used to load wider constants (that have zeros
1497 // in their most significant bits).
1498 if value == 0 {
1499 smallvec![Inst::VecDupImm {
1500 rd,
1501 imm: ASIMDMovModImm::zero(ScalarSize::Size32),
1502 invert: false,
1503 size: VectorSize::Size32x2
1504 }]
1505 } else {
1506 // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
1507 // bits.
1508 let tmp = alloc_tmp(I32);
1509 let mut insts = Inst::load_constant(tmp, value as u64);
1510
1511 insts.push(Inst::MovToFpu {
1512 rd,
1513 rn: tmp.to_reg(),
1514 size: ScalarSize::Size64,
1515 });
1516
1517 insts
1518 }
1519 }
1520
1521 /// Create instructions that load a 64-bit floating-point constant.
load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>( rd: Writable<Reg>, const_data: u64, mut alloc_tmp: F, ) -> SmallVec<[Inst; 4]>1522 pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
1523 rd: Writable<Reg>,
1524 const_data: u64,
1525 mut alloc_tmp: F,
1526 ) -> SmallVec<[Inst; 4]> {
1527 // Note that we must make sure that all bits outside the lowest 64 are set to 0
1528 // because this function is also used to load wider constants (that have zeros
1529 // in their most significant bits).
1530 if let Ok(const_data) = u32::try_from(const_data) {
1531 Inst::load_fp_constant32(rd, const_data, alloc_tmp)
1532 // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
1533 // bits. Also, treat it as half of a 128-bit vector and consider replicated
1534 // patterns. Scalar MOVI might also be an option.
1535 } else if const_data & (u32::MAX as u64) == 0 {
1536 let tmp = alloc_tmp(I64);
1537 let mut insts = Inst::load_constant(tmp, const_data);
1538
1539 insts.push(Inst::MovToFpu {
1540 rd,
1541 rn: tmp.to_reg(),
1542 size: ScalarSize::Size64,
1543 });
1544
1545 insts
1546 } else {
1547 smallvec![Inst::LoadFpuConst64 { rd, const_data }]
1548 }
1549 }
1550
1551 /// Create instructions that load a 128-bit vector constant.
load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>( rd: Writable<Reg>, const_data: u128, alloc_tmp: F, ) -> SmallVec<[Inst; 5]>1552 pub fn load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>(
1553 rd: Writable<Reg>,
1554 const_data: u128,
1555 alloc_tmp: F,
1556 ) -> SmallVec<[Inst; 5]> {
1557 if let Ok(const_data) = u64::try_from(const_data) {
1558 SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
1559 } else if let Some((pattern, size)) =
1560 Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
1561 {
1562 Inst::load_replicated_vector_pattern(
1563 rd,
1564 pattern,
1565 VectorSize::from_lane_size(size, true),
1566 alloc_tmp,
1567 )
1568 } else {
1569 smallvec![Inst::LoadFpuConst128 { rd, const_data }]
1570 }
1571 }
1572
1573 /// Determine whether a 128-bit constant represents a vector consisting of elements with
1574 /// the same value.
get_replicated_vector_pattern( value: u128, size: ScalarSize, ) -> Option<(u64, ScalarSize)>1575 pub fn get_replicated_vector_pattern(
1576 value: u128,
1577 size: ScalarSize,
1578 ) -> Option<(u64, ScalarSize)> {
1579 let (mask, shift, next_size) = match size {
1580 ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
1581 ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
1582 ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
1583 ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
1584 _ => return None,
1585 };
1586 let mut r = None;
1587 let v = value & mask;
1588
1589 if (value >> shift) & mask == v {
1590 r = Inst::get_replicated_vector_pattern(v, next_size);
1591
1592 if r.is_none() {
1593 r = Some((v as u64, size));
1594 }
1595 }
1596
1597 r
1598 }
1599
1600 /// Create instructions that load a vector constant consisting of elements with
1601 /// the same value.
load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>( rd: Writable<Reg>, pattern: u64, size: VectorSize, mut alloc_tmp: F, ) -> SmallVec<[Inst; 5]>1602 pub fn load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>(
1603 rd: Writable<Reg>,
1604 pattern: u64,
1605 size: VectorSize,
1606 mut alloc_tmp: F,
1607 ) -> SmallVec<[Inst; 5]> {
1608 let lane_size = size.lane_size();
1609 let widen_32_bit_pattern = |pattern, lane_size| {
1610 if lane_size == ScalarSize::Size32 {
1611 let pattern = pattern as u32 as u64;
1612
1613 ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64)
1614 } else {
1615 None
1616 }
1617 };
1618
1619 if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
1620 smallvec![Inst::VecDupImm {
1621 rd,
1622 imm,
1623 invert: false,
1624 size
1625 }]
1626 } else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
1627 debug_assert_ne!(lane_size, ScalarSize::Size8);
1628 debug_assert_ne!(lane_size, ScalarSize::Size64);
1629
1630 smallvec![Inst::VecDupImm {
1631 rd,
1632 imm,
1633 invert: true,
1634 size
1635 }]
1636 } else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) {
1637 let mut insts = smallvec![Inst::VecDupImm {
1638 rd,
1639 imm,
1640 invert: false,
1641 size: VectorSize::Size64x2,
1642 }];
1643
1644 // TODO: Implement support for 64-bit scalar MOVI; we zero-extend the
1645 // lower 64 bits instead.
1646 if !size.is_128bits() {
1647 insts.push(Inst::FpuExtend {
1648 rd,
1649 rn: rd.to_reg(),
1650 size: ScalarSize::Size64,
1651 });
1652 }
1653
1654 insts
1655 } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) {
1656 smallvec![Inst::VecDupFPImm { rd, imm, size }]
1657 } else {
1658 let tmp = alloc_tmp(I64);
1659 let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
1660
1661 insts.push(Inst::VecDup {
1662 rd,
1663 rn: tmp.to_reg(),
1664 size,
1665 });
1666
1667 insts
1668 }
1669 }
1670
1671 /// Generic constructor for a load (zero-extending where appropriate).
gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst1672 pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
1673 match ty {
1674 B1 | B8 | I8 => Inst::ULoad8 {
1675 rd: into_reg,
1676 mem,
1677 flags,
1678 },
1679 B16 | I16 => Inst::ULoad16 {
1680 rd: into_reg,
1681 mem,
1682 flags,
1683 },
1684 B32 | I32 | R32 => Inst::ULoad32 {
1685 rd: into_reg,
1686 mem,
1687 flags,
1688 },
1689 B64 | I64 | R64 => Inst::ULoad64 {
1690 rd: into_reg,
1691 mem,
1692 flags,
1693 },
1694 F32 => Inst::FpuLoad32 {
1695 rd: into_reg,
1696 mem,
1697 flags,
1698 },
1699 F64 => Inst::FpuLoad64 {
1700 rd: into_reg,
1701 mem,
1702 flags,
1703 },
1704 _ => {
1705 if ty.is_vector() {
1706 let bits = ty_bits(ty);
1707 let rd = into_reg;
1708
1709 if bits == 128 {
1710 Inst::FpuLoad128 { rd, mem, flags }
1711 } else {
1712 assert_eq!(bits, 64);
1713 Inst::FpuLoad64 { rd, mem, flags }
1714 }
1715 } else {
1716 unimplemented!("gen_load({})", ty);
1717 }
1718 }
1719 }
1720 }
1721
1722 /// Generic constructor for a store.
gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst1723 pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
1724 match ty {
1725 B1 | B8 | I8 => Inst::Store8 {
1726 rd: from_reg,
1727 mem,
1728 flags,
1729 },
1730 B16 | I16 => Inst::Store16 {
1731 rd: from_reg,
1732 mem,
1733 flags,
1734 },
1735 B32 | I32 | R32 => Inst::Store32 {
1736 rd: from_reg,
1737 mem,
1738 flags,
1739 },
1740 B64 | I64 | R64 => Inst::Store64 {
1741 rd: from_reg,
1742 mem,
1743 flags,
1744 },
1745 F32 => Inst::FpuStore32 {
1746 rd: from_reg,
1747 mem,
1748 flags,
1749 },
1750 F64 => Inst::FpuStore64 {
1751 rd: from_reg,
1752 mem,
1753 flags,
1754 },
1755 _ => {
1756 if ty.is_vector() {
1757 let bits = ty_bits(ty);
1758 let rd = from_reg;
1759
1760 if bits == 128 {
1761 Inst::FpuStore128 { rd, mem, flags }
1762 } else {
1763 assert_eq!(bits, 64);
1764 Inst::FpuStore64 { rd, mem, flags }
1765 }
1766 } else {
1767 unimplemented!("gen_store({})", ty);
1768 }
1769 }
1770 }
1771 }
1772
1773 /// Generate a LoadAddr instruction (load address of an amode into
1774 /// register). Elides when possible (when amode is just a register). Returns
1775 /// destination register: either `rd` or a register directly from the amode.
gen_load_addr(rd: Writable<Reg>, mem: AMode) -> (Reg, Option<Inst>)1776 pub fn gen_load_addr(rd: Writable<Reg>, mem: AMode) -> (Reg, Option<Inst>) {
1777 if let Some(r) = mem.is_reg() {
1778 (r, None)
1779 } else {
1780 (rd.to_reg(), Some(Inst::LoadAddr { rd, mem }))
1781 }
1782 }
1783 }
1784
1785 //=============================================================================
1786 // Instructions: get_regs
1787
memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector)1788 fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) {
1789 match memarg {
1790 &AMode::Unscaled(reg, ..) | &AMode::UnsignedOffset(reg, ..) => {
1791 collector.add_use(reg);
1792 }
1793 &AMode::RegReg(r1, r2, ..)
1794 | &AMode::RegScaled(r1, r2, ..)
1795 | &AMode::RegScaledExtended(r1, r2, ..)
1796 | &AMode::RegExtended(r1, r2, ..) => {
1797 collector.add_use(r1);
1798 collector.add_use(r2);
1799 }
1800 &AMode::Label(..) => {}
1801 &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => {
1802 collector.add_mod(reg);
1803 }
1804 &AMode::FPOffset(..) => {
1805 collector.add_use(fp_reg());
1806 }
1807 &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {
1808 collector.add_use(stack_reg());
1809 }
1810 &AMode::RegOffset(r, ..) => {
1811 collector.add_use(r);
1812 }
1813 }
1814 }
1815
pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector)1816 fn pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector) {
1817 match pairmemarg {
1818 &PairAMode::SignedOffset(reg, ..) => {
1819 collector.add_use(reg);
1820 }
1821 &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => {
1822 collector.add_mod(reg);
1823 }
1824 }
1825 }
1826
aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector)1827 fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
1828 match inst {
1829 &Inst::AluRRR { rd, rn, rm, .. } => {
1830 collector.add_def(rd);
1831 collector.add_use(rn);
1832 collector.add_use(rm);
1833 }
1834 &Inst::AluRRRR { rd, rn, rm, ra, .. } => {
1835 collector.add_def(rd);
1836 collector.add_use(rn);
1837 collector.add_use(rm);
1838 collector.add_use(ra);
1839 }
1840 &Inst::AluRRImm12 { rd, rn, .. } => {
1841 collector.add_def(rd);
1842 collector.add_use(rn);
1843 }
1844 &Inst::AluRRImmLogic { rd, rn, .. } => {
1845 collector.add_def(rd);
1846 collector.add_use(rn);
1847 }
1848 &Inst::AluRRImmShift { rd, rn, .. } => {
1849 collector.add_def(rd);
1850 collector.add_use(rn);
1851 }
1852 &Inst::AluRRRShift { rd, rn, rm, .. } => {
1853 collector.add_def(rd);
1854 collector.add_use(rn);
1855 collector.add_use(rm);
1856 }
1857 &Inst::AluRRRExtend { rd, rn, rm, .. } => {
1858 collector.add_def(rd);
1859 collector.add_use(rn);
1860 collector.add_use(rm);
1861 }
1862 &Inst::BitRR { rd, rn, .. } => {
1863 collector.add_def(rd);
1864 collector.add_use(rn);
1865 }
1866 &Inst::ULoad8 { rd, ref mem, .. }
1867 | &Inst::SLoad8 { rd, ref mem, .. }
1868 | &Inst::ULoad16 { rd, ref mem, .. }
1869 | &Inst::SLoad16 { rd, ref mem, .. }
1870 | &Inst::ULoad32 { rd, ref mem, .. }
1871 | &Inst::SLoad32 { rd, ref mem, .. }
1872 | &Inst::ULoad64 { rd, ref mem, .. } => {
1873 collector.add_def(rd);
1874 memarg_regs(mem, collector);
1875 }
1876 &Inst::Store8 { rd, ref mem, .. }
1877 | &Inst::Store16 { rd, ref mem, .. }
1878 | &Inst::Store32 { rd, ref mem, .. }
1879 | &Inst::Store64 { rd, ref mem, .. } => {
1880 collector.add_use(rd);
1881 memarg_regs(mem, collector);
1882 }
1883 &Inst::StoreP64 {
1884 rt, rt2, ref mem, ..
1885 } => {
1886 collector.add_use(rt);
1887 collector.add_use(rt2);
1888 pairmemarg_regs(mem, collector);
1889 }
1890 &Inst::LoadP64 {
1891 rt, rt2, ref mem, ..
1892 } => {
1893 collector.add_def(rt);
1894 collector.add_def(rt2);
1895 pairmemarg_regs(mem, collector);
1896 }
1897 &Inst::Mov64 { rd, rm } => {
1898 collector.add_def(rd);
1899 collector.add_use(rm);
1900 }
1901 &Inst::Mov32 { rd, rm } => {
1902 collector.add_def(rd);
1903 collector.add_use(rm);
1904 }
1905 &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => {
1906 collector.add_def(rd);
1907 }
1908 &Inst::MovK { rd, .. } => {
1909 collector.add_mod(rd);
1910 }
1911 &Inst::CSel { rd, rn, rm, .. } => {
1912 collector.add_def(rd);
1913 collector.add_use(rn);
1914 collector.add_use(rm);
1915 }
1916 &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
1917 collector.add_def(rd);
1918 }
1919 &Inst::CCmpImm { rn, .. } => {
1920 collector.add_use(rn);
1921 }
1922 &Inst::AtomicRMW { .. } => {
1923 collector.add_use(xreg(25));
1924 collector.add_use(xreg(26));
1925 collector.add_def(writable_xreg(24));
1926 collector.add_def(writable_xreg(27));
1927 collector.add_def(writable_xreg(28));
1928 }
1929 &Inst::AtomicCAS { rs, rt, rn, .. } => {
1930 collector.add_mod(rs);
1931 collector.add_use(rt);
1932 collector.add_use(rn);
1933 }
1934 &Inst::AtomicCASLoop { .. } => {
1935 collector.add_use(xreg(25));
1936 collector.add_use(xreg(26));
1937 collector.add_use(xreg(28));
1938 collector.add_def(writable_xreg(24));
1939 collector.add_def(writable_xreg(27));
1940 }
1941 &Inst::LoadAcquire { rt, rn, .. } => {
1942 collector.add_use(rn);
1943 collector.add_def(rt);
1944 }
1945 &Inst::StoreRelease { rt, rn, .. } => {
1946 collector.add_use(rn);
1947 collector.add_use(rt);
1948 }
1949 &Inst::Fence {} => {}
1950 &Inst::FpuMove64 { rd, rn } => {
1951 collector.add_def(rd);
1952 collector.add_use(rn);
1953 }
1954 &Inst::FpuMove128 { rd, rn } => {
1955 collector.add_def(rd);
1956 collector.add_use(rn);
1957 }
1958 &Inst::FpuMoveFromVec { rd, rn, .. } => {
1959 collector.add_def(rd);
1960 collector.add_use(rn);
1961 }
1962 &Inst::FpuExtend { rd, rn, .. } => {
1963 collector.add_def(rd);
1964 collector.add_use(rn);
1965 }
1966 &Inst::FpuRR { rd, rn, .. } => {
1967 collector.add_def(rd);
1968 collector.add_use(rn);
1969 }
1970 &Inst::FpuRRR { rd, rn, rm, .. } => {
1971 collector.add_def(rd);
1972 collector.add_use(rn);
1973 collector.add_use(rm);
1974 }
1975 &Inst::FpuRRI { fpu_op, rd, rn, .. } => {
1976 match fpu_op {
1977 FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.add_def(rd),
1978 FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.add_mod(rd),
1979 }
1980 collector.add_use(rn);
1981 }
1982 &Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
1983 collector.add_def(rd);
1984 collector.add_use(rn);
1985 collector.add_use(rm);
1986 collector.add_use(ra);
1987 }
1988 &Inst::VecMisc { rd, rn, .. } => {
1989 collector.add_def(rd);
1990 collector.add_use(rn);
1991 }
1992
1993 &Inst::VecLanes { rd, rn, .. } => {
1994 collector.add_def(rd);
1995 collector.add_use(rn);
1996 }
1997 &Inst::VecShiftImm { rd, rn, .. } => {
1998 collector.add_def(rd);
1999 collector.add_use(rn);
2000 }
2001 &Inst::VecExtract { rd, rn, rm, .. } => {
2002 collector.add_def(rd);
2003 collector.add_use(rn);
2004 collector.add_use(rm);
2005 }
2006 &Inst::VecTbl {
2007 rd,
2008 rn,
2009 rm,
2010 is_extension,
2011 } => {
2012 collector.add_use(rn);
2013 collector.add_use(rm);
2014
2015 if is_extension {
2016 collector.add_mod(rd);
2017 } else {
2018 collector.add_def(rd);
2019 }
2020 }
2021 &Inst::VecTbl2 {
2022 rd,
2023 rn,
2024 rn2,
2025 rm,
2026 is_extension,
2027 } => {
2028 collector.add_use(rn);
2029 collector.add_use(rn2);
2030 collector.add_use(rm);
2031
2032 if is_extension {
2033 collector.add_mod(rd);
2034 } else {
2035 collector.add_def(rd);
2036 }
2037 }
2038 &Inst::VecLoadReplicate { rd, rn, .. } => {
2039 collector.add_def(rd);
2040 collector.add_use(rn);
2041 }
2042 &Inst::VecCSel { rd, rn, rm, .. } => {
2043 collector.add_def(rd);
2044 collector.add_use(rn);
2045 collector.add_use(rm);
2046 }
2047 &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
2048 collector.add_use(rn);
2049 collector.add_use(rm);
2050 }
2051 &Inst::FpuLoad32 { rd, ref mem, .. } => {
2052 collector.add_def(rd);
2053 memarg_regs(mem, collector);
2054 }
2055 &Inst::FpuLoad64 { rd, ref mem, .. } => {
2056 collector.add_def(rd);
2057 memarg_regs(mem, collector);
2058 }
2059 &Inst::FpuLoad128 { rd, ref mem, .. } => {
2060 collector.add_def(rd);
2061 memarg_regs(mem, collector);
2062 }
2063 &Inst::FpuStore32 { rd, ref mem, .. } => {
2064 collector.add_use(rd);
2065 memarg_regs(mem, collector);
2066 }
2067 &Inst::FpuStore64 { rd, ref mem, .. } => {
2068 collector.add_use(rd);
2069 memarg_regs(mem, collector);
2070 }
2071 &Inst::FpuStore128 { rd, ref mem, .. } => {
2072 collector.add_use(rd);
2073 memarg_regs(mem, collector);
2074 }
2075 &Inst::FpuLoadP64 {
2076 rt, rt2, ref mem, ..
2077 } => {
2078 collector.add_def(rt);
2079 collector.add_def(rt2);
2080 pairmemarg_regs(mem, collector);
2081 }
2082 &Inst::FpuStoreP64 {
2083 rt, rt2, ref mem, ..
2084 } => {
2085 collector.add_use(rt);
2086 collector.add_use(rt2);
2087 pairmemarg_regs(mem, collector);
2088 }
2089 &Inst::FpuLoadP128 {
2090 rt, rt2, ref mem, ..
2091 } => {
2092 collector.add_def(rt);
2093 collector.add_def(rt2);
2094 pairmemarg_regs(mem, collector);
2095 }
2096 &Inst::FpuStoreP128 {
2097 rt, rt2, ref mem, ..
2098 } => {
2099 collector.add_use(rt);
2100 collector.add_use(rt2);
2101 pairmemarg_regs(mem, collector);
2102 }
2103 &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
2104 collector.add_def(rd);
2105 }
2106 &Inst::FpuToInt { rd, rn, .. } => {
2107 collector.add_def(rd);
2108 collector.add_use(rn);
2109 }
2110 &Inst::IntToFpu { rd, rn, .. } => {
2111 collector.add_def(rd);
2112 collector.add_use(rn);
2113 }
2114 &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => {
2115 collector.add_def(rd);
2116 collector.add_use(rn);
2117 collector.add_use(rm);
2118 }
2119 &Inst::FpuRound { rd, rn, .. } => {
2120 collector.add_def(rd);
2121 collector.add_use(rn);
2122 }
2123 &Inst::MovToFpu { rd, rn, .. } => {
2124 collector.add_def(rd);
2125 collector.add_use(rn);
2126 }
2127 &Inst::MovToVec { rd, rn, .. } => {
2128 collector.add_mod(rd);
2129 collector.add_use(rn);
2130 }
2131 &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
2132 collector.add_def(rd);
2133 collector.add_use(rn);
2134 }
2135 &Inst::VecDup { rd, rn, .. } => {
2136 collector.add_def(rd);
2137 collector.add_use(rn);
2138 }
2139 &Inst::VecDupFromFpu { rd, rn, .. } => {
2140 collector.add_def(rd);
2141 collector.add_use(rn);
2142 }
2143 &Inst::VecDupFPImm { rd, .. } => {
2144 collector.add_def(rd);
2145 }
2146 &Inst::VecDupImm { rd, .. } => {
2147 collector.add_def(rd);
2148 }
2149 &Inst::VecExtend { rd, rn, .. } => {
2150 collector.add_def(rd);
2151 collector.add_use(rn);
2152 }
2153 &Inst::VecMovElement { rd, rn, .. } => {
2154 collector.add_mod(rd);
2155 collector.add_use(rn);
2156 }
2157 &Inst::VecRRLong { rd, rn, .. } => {
2158 collector.add_def(rd);
2159 collector.add_use(rn);
2160 }
2161 &Inst::VecRRNarrow {
2162 rd, rn, high_half, ..
2163 } => {
2164 collector.add_use(rn);
2165
2166 if high_half {
2167 collector.add_mod(rd);
2168 } else {
2169 collector.add_def(rd);
2170 }
2171 }
2172 &Inst::VecRRPair { rd, rn, .. } => {
2173 collector.add_def(rd);
2174 collector.add_use(rn);
2175 }
2176 &Inst::VecRRRLong {
2177 alu_op, rd, rn, rm, ..
2178 } => {
2179 match alu_op {
2180 VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
2181 collector.add_mod(rd)
2182 }
2183 _ => collector.add_def(rd),
2184 };
2185 collector.add_use(rn);
2186 collector.add_use(rm);
2187 }
2188 &Inst::VecRRPairLong { rd, rn, .. } => {
2189 collector.add_def(rd);
2190 collector.add_use(rn);
2191 }
2192 &Inst::VecRRR {
2193 alu_op, rd, rn, rm, ..
2194 } => {
2195 if alu_op == VecALUOp::Bsl {
2196 collector.add_mod(rd);
2197 } else {
2198 collector.add_def(rd);
2199 }
2200 collector.add_use(rn);
2201 collector.add_use(rm);
2202 }
2203 &Inst::MovToNZCV { rn } => {
2204 collector.add_use(rn);
2205 }
2206 &Inst::MovFromNZCV { rd } => {
2207 collector.add_def(rd);
2208 }
2209 &Inst::Extend { rd, rn, .. } => {
2210 collector.add_def(rd);
2211 collector.add_use(rn);
2212 }
2213 &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {}
2214 &Inst::Call { ref info, .. } => {
2215 collector.add_uses(&*info.uses);
2216 collector.add_defs(&*info.defs);
2217 }
2218 &Inst::CallInd { ref info, .. } => {
2219 collector.add_uses(&*info.uses);
2220 collector.add_defs(&*info.defs);
2221 collector.add_use(info.rn);
2222 }
2223 &Inst::CondBr { ref kind, .. } => match kind {
2224 CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
2225 collector.add_use(*rt);
2226 }
2227 CondBrKind::Cond(_) => {}
2228 },
2229 &Inst::IndirectBr { rn, .. } => {
2230 collector.add_use(rn);
2231 }
2232 &Inst::Nop0 | Inst::Nop4 => {}
2233 &Inst::Brk => {}
2234 &Inst::Udf { .. } => {}
2235 &Inst::TrapIf { ref kind, .. } => match kind {
2236 CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
2237 collector.add_use(*rt);
2238 }
2239 CondBrKind::Cond(_) => {}
2240 },
2241 &Inst::Adr { rd, .. } => {
2242 collector.add_def(rd);
2243 }
2244 &Inst::Word4 { .. } | &Inst::Word8 { .. } => {}
2245 &Inst::JTSequence {
2246 ridx, rtmp1, rtmp2, ..
2247 } => {
2248 collector.add_use(ridx);
2249 collector.add_def(rtmp1);
2250 collector.add_def(rtmp2);
2251 }
2252 &Inst::LoadExtName { rd, .. } => {
2253 collector.add_def(rd);
2254 }
2255 &Inst::LoadAddr { rd, ref mem } => {
2256 collector.add_def(rd);
2257 memarg_regs(mem, collector);
2258 }
2259 &Inst::VirtualSPOffsetAdj { .. } => {}
2260 &Inst::ValueLabelMarker { reg, .. } => {
2261 collector.add_use(reg);
2262 }
2263
2264 &Inst::ElfTlsGetAddr { .. } => {
2265 for reg in AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV) {
2266 collector.add_def(reg);
2267 }
2268 }
2269 &Inst::Unwind { .. } => {}
2270 &Inst::EmitIsland { .. } => {}
2271 }
2272 }
2273
2274 //=============================================================================
2275 // Instructions: map_regs
2276
aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM)2277 fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
2278 fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
2279 if r.is_virtual() {
2280 let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
2281 *r = new;
2282 }
2283 }
2284
2285 fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
2286 if r.to_reg().is_virtual() {
2287 let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
2288 *r = Writable::from_reg(new);
2289 }
2290 }
2291
2292 fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
2293 if r.to_reg().is_virtual() {
2294 let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
2295 *r = Writable::from_reg(new);
2296 }
2297 }
2298
2299 fn map_mem<RUM: RegUsageMapper>(m: &RUM, mem: &mut AMode) {
2300 // N.B.: we take only the pre-map here, but this is OK because the
2301 // only addressing modes that update registers (pre/post-increment on
2302 // AArch64) both read and write registers, so they are "mods" rather
2303 // than "defs", so must be the same in both the pre- and post-map.
2304 match mem {
2305 &mut AMode::Unscaled(ref mut reg, ..) => map_use(m, reg),
2306 &mut AMode::UnsignedOffset(ref mut reg, ..) => map_use(m, reg),
2307 &mut AMode::RegReg(ref mut r1, ref mut r2)
2308 | &mut AMode::RegScaled(ref mut r1, ref mut r2, ..)
2309 | &mut AMode::RegScaledExtended(ref mut r1, ref mut r2, ..)
2310 | &mut AMode::RegExtended(ref mut r1, ref mut r2, ..) => {
2311 map_use(m, r1);
2312 map_use(m, r2);
2313 }
2314 &mut AMode::Label(..) => {}
2315 &mut AMode::PreIndexed(ref mut r, ..) => map_mod(m, r),
2316 &mut AMode::PostIndexed(ref mut r, ..) => map_mod(m, r),
2317 &mut AMode::FPOffset(..)
2318 | &mut AMode::SPOffset(..)
2319 | &mut AMode::NominalSPOffset(..) => {}
2320 &mut AMode::RegOffset(ref mut r, ..) => map_use(m, r),
2321 };
2322 }
2323
2324 fn map_pairmem<RUM: RegUsageMapper>(m: &RUM, mem: &mut PairAMode) {
2325 match mem {
2326 &mut PairAMode::SignedOffset(ref mut reg, ..) => map_use(m, reg),
2327 &mut PairAMode::PreIndexed(ref mut reg, ..) => map_def(m, reg),
2328 &mut PairAMode::PostIndexed(ref mut reg, ..) => map_def(m, reg),
2329 }
2330 }
2331
2332 fn map_br<RUM: RegUsageMapper>(m: &RUM, br: &mut CondBrKind) {
2333 match br {
2334 &mut CondBrKind::Zero(ref mut reg) => map_use(m, reg),
2335 &mut CondBrKind::NotZero(ref mut reg) => map_use(m, reg),
2336 &mut CondBrKind::Cond(..) => {}
2337 };
2338 }
2339
2340 match inst {
2341 &mut Inst::AluRRR {
2342 ref mut rd,
2343 ref mut rn,
2344 ref mut rm,
2345 ..
2346 } => {
2347 map_def(mapper, rd);
2348 map_use(mapper, rn);
2349 map_use(mapper, rm);
2350 }
2351 &mut Inst::AluRRRR {
2352 ref mut rd,
2353 ref mut rn,
2354 ref mut rm,
2355 ref mut ra,
2356 ..
2357 } => {
2358 map_def(mapper, rd);
2359 map_use(mapper, rn);
2360 map_use(mapper, rm);
2361 map_use(mapper, ra);
2362 }
2363 &mut Inst::AluRRImm12 {
2364 ref mut rd,
2365 ref mut rn,
2366 ..
2367 } => {
2368 map_def(mapper, rd);
2369 map_use(mapper, rn);
2370 }
2371 &mut Inst::AluRRImmLogic {
2372 ref mut rd,
2373 ref mut rn,
2374 ..
2375 } => {
2376 map_def(mapper, rd);
2377 map_use(mapper, rn);
2378 }
2379 &mut Inst::AluRRImmShift {
2380 ref mut rd,
2381 ref mut rn,
2382 ..
2383 } => {
2384 map_def(mapper, rd);
2385 map_use(mapper, rn);
2386 }
2387 &mut Inst::AluRRRShift {
2388 ref mut rd,
2389 ref mut rn,
2390 ref mut rm,
2391 ..
2392 } => {
2393 map_def(mapper, rd);
2394 map_use(mapper, rn);
2395 map_use(mapper, rm);
2396 }
2397 &mut Inst::AluRRRExtend {
2398 ref mut rd,
2399 ref mut rn,
2400 ref mut rm,
2401 ..
2402 } => {
2403 map_def(mapper, rd);
2404 map_use(mapper, rn);
2405 map_use(mapper, rm);
2406 }
2407 &mut Inst::BitRR {
2408 ref mut rd,
2409 ref mut rn,
2410 ..
2411 } => {
2412 map_def(mapper, rd);
2413 map_use(mapper, rn);
2414 }
2415 &mut Inst::ULoad8 {
2416 ref mut rd,
2417 ref mut mem,
2418 ..
2419 } => {
2420 map_def(mapper, rd);
2421 map_mem(mapper, mem);
2422 }
2423 &mut Inst::SLoad8 {
2424 ref mut rd,
2425 ref mut mem,
2426 ..
2427 } => {
2428 map_def(mapper, rd);
2429 map_mem(mapper, mem);
2430 }
2431 &mut Inst::ULoad16 {
2432 ref mut rd,
2433 ref mut mem,
2434 ..
2435 } => {
2436 map_def(mapper, rd);
2437 map_mem(mapper, mem);
2438 }
2439 &mut Inst::SLoad16 {
2440 ref mut rd,
2441 ref mut mem,
2442 ..
2443 } => {
2444 map_def(mapper, rd);
2445 map_mem(mapper, mem);
2446 }
2447 &mut Inst::ULoad32 {
2448 ref mut rd,
2449 ref mut mem,
2450 ..
2451 } => {
2452 map_def(mapper, rd);
2453 map_mem(mapper, mem);
2454 }
2455 &mut Inst::SLoad32 {
2456 ref mut rd,
2457 ref mut mem,
2458 ..
2459 } => {
2460 map_def(mapper, rd);
2461 map_mem(mapper, mem);
2462 }
2463
2464 &mut Inst::ULoad64 {
2465 ref mut rd,
2466 ref mut mem,
2467 ..
2468 } => {
2469 map_def(mapper, rd);
2470 map_mem(mapper, mem);
2471 }
2472 &mut Inst::Store8 {
2473 ref mut rd,
2474 ref mut mem,
2475 ..
2476 } => {
2477 map_use(mapper, rd);
2478 map_mem(mapper, mem);
2479 }
2480 &mut Inst::Store16 {
2481 ref mut rd,
2482 ref mut mem,
2483 ..
2484 } => {
2485 map_use(mapper, rd);
2486 map_mem(mapper, mem);
2487 }
2488 &mut Inst::Store32 {
2489 ref mut rd,
2490 ref mut mem,
2491 ..
2492 } => {
2493 map_use(mapper, rd);
2494 map_mem(mapper, mem);
2495 }
2496 &mut Inst::Store64 {
2497 ref mut rd,
2498 ref mut mem,
2499 ..
2500 } => {
2501 map_use(mapper, rd);
2502 map_mem(mapper, mem);
2503 }
2504
2505 &mut Inst::StoreP64 {
2506 ref mut rt,
2507 ref mut rt2,
2508 ref mut mem,
2509 ..
2510 } => {
2511 map_use(mapper, rt);
2512 map_use(mapper, rt2);
2513 map_pairmem(mapper, mem);
2514 }
2515 &mut Inst::LoadP64 {
2516 ref mut rt,
2517 ref mut rt2,
2518 ref mut mem,
2519 ..
2520 } => {
2521 map_def(mapper, rt);
2522 map_def(mapper, rt2);
2523 map_pairmem(mapper, mem);
2524 }
2525 &mut Inst::Mov64 {
2526 ref mut rd,
2527 ref mut rm,
2528 } => {
2529 map_def(mapper, rd);
2530 map_use(mapper, rm);
2531 }
2532 &mut Inst::Mov32 {
2533 ref mut rd,
2534 ref mut rm,
2535 } => {
2536 map_def(mapper, rd);
2537 map_use(mapper, rm);
2538 }
2539 &mut Inst::MovZ { ref mut rd, .. } => {
2540 map_def(mapper, rd);
2541 }
2542 &mut Inst::MovN { ref mut rd, .. } => {
2543 map_def(mapper, rd);
2544 }
2545 &mut Inst::MovK { ref mut rd, .. } => {
2546 map_def(mapper, rd);
2547 }
2548 &mut Inst::CSel {
2549 ref mut rd,
2550 ref mut rn,
2551 ref mut rm,
2552 ..
2553 } => {
2554 map_def(mapper, rd);
2555 map_use(mapper, rn);
2556 map_use(mapper, rm);
2557 }
2558 &mut Inst::CSet { ref mut rd, .. } | &mut Inst::CSetm { ref mut rd, .. } => {
2559 map_def(mapper, rd);
2560 }
2561 &mut Inst::CCmpImm { ref mut rn, .. } => {
2562 map_use(mapper, rn);
2563 }
2564 &mut Inst::AtomicRMW { .. } => {
2565 // There are no vregs to map in this insn.
2566 }
2567 &mut Inst::AtomicCAS {
2568 ref mut rs,
2569 ref mut rt,
2570 ref mut rn,
2571 ..
2572 } => {
2573 map_mod(mapper, rs);
2574 map_use(mapper, rt);
2575 map_use(mapper, rn);
2576 }
2577 &mut Inst::AtomicCASLoop { .. } => {
2578 // There are no vregs to map in this insn.
2579 }
2580 &mut Inst::LoadAcquire {
2581 ref mut rt,
2582 ref mut rn,
2583 ..
2584 } => {
2585 map_def(mapper, rt);
2586 map_use(mapper, rn);
2587 }
2588 &mut Inst::StoreRelease {
2589 ref mut rt,
2590 ref mut rn,
2591 ..
2592 } => {
2593 map_use(mapper, rt);
2594 map_use(mapper, rn);
2595 }
2596 &mut Inst::Fence {} => {}
2597 &mut Inst::FpuMove64 {
2598 ref mut rd,
2599 ref mut rn,
2600 } => {
2601 map_def(mapper, rd);
2602 map_use(mapper, rn);
2603 }
2604 &mut Inst::FpuMove128 {
2605 ref mut rd,
2606 ref mut rn,
2607 } => {
2608 map_def(mapper, rd);
2609 map_use(mapper, rn);
2610 }
2611 &mut Inst::FpuMoveFromVec {
2612 ref mut rd,
2613 ref mut rn,
2614 ..
2615 } => {
2616 map_def(mapper, rd);
2617 map_use(mapper, rn);
2618 }
2619 &mut Inst::FpuExtend {
2620 ref mut rd,
2621 ref mut rn,
2622 ..
2623 } => {
2624 map_def(mapper, rd);
2625 map_use(mapper, rn);
2626 }
2627 &mut Inst::FpuRR {
2628 ref mut rd,
2629 ref mut rn,
2630 ..
2631 } => {
2632 map_def(mapper, rd);
2633 map_use(mapper, rn);
2634 }
2635 &mut Inst::FpuRRR {
2636 ref mut rd,
2637 ref mut rn,
2638 ref mut rm,
2639 ..
2640 } => {
2641 map_def(mapper, rd);
2642 map_use(mapper, rn);
2643 map_use(mapper, rm);
2644 }
2645 &mut Inst::FpuRRI {
2646 fpu_op,
2647 ref mut rd,
2648 ref mut rn,
2649 ..
2650 } => {
2651 match fpu_op {
2652 FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => map_def(mapper, rd),
2653 FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => map_mod(mapper, rd),
2654 }
2655 map_use(mapper, rn);
2656 }
2657 &mut Inst::FpuRRRR {
2658 ref mut rd,
2659 ref mut rn,
2660 ref mut rm,
2661 ref mut ra,
2662 ..
2663 } => {
2664 map_def(mapper, rd);
2665 map_use(mapper, rn);
2666 map_use(mapper, rm);
2667 map_use(mapper, ra);
2668 }
2669 &mut Inst::VecMisc {
2670 ref mut rd,
2671 ref mut rn,
2672 ..
2673 } => {
2674 map_def(mapper, rd);
2675 map_use(mapper, rn);
2676 }
2677 &mut Inst::VecLanes {
2678 ref mut rd,
2679 ref mut rn,
2680 ..
2681 } => {
2682 map_def(mapper, rd);
2683 map_use(mapper, rn);
2684 }
2685 &mut Inst::VecShiftImm {
2686 ref mut rd,
2687 ref mut rn,
2688 ..
2689 } => {
2690 map_def(mapper, rd);
2691 map_use(mapper, rn);
2692 }
2693 &mut Inst::VecExtract {
2694 ref mut rd,
2695 ref mut rn,
2696 ref mut rm,
2697 ..
2698 } => {
2699 map_def(mapper, rd);
2700 map_use(mapper, rn);
2701 map_use(mapper, rm);
2702 }
2703 &mut Inst::VecTbl {
2704 ref mut rd,
2705 ref mut rn,
2706 ref mut rm,
2707 is_extension,
2708 } => {
2709 map_use(mapper, rn);
2710 map_use(mapper, rm);
2711
2712 if is_extension {
2713 map_mod(mapper, rd);
2714 } else {
2715 map_def(mapper, rd);
2716 }
2717 }
2718 &mut Inst::VecTbl2 {
2719 ref mut rd,
2720 ref mut rn,
2721 ref mut rn2,
2722 ref mut rm,
2723 is_extension,
2724 } => {
2725 map_use(mapper, rn);
2726 map_use(mapper, rn2);
2727 map_use(mapper, rm);
2728
2729 if is_extension {
2730 map_mod(mapper, rd);
2731 } else {
2732 map_def(mapper, rd);
2733 }
2734 }
2735 &mut Inst::VecLoadReplicate {
2736 ref mut rd,
2737 ref mut rn,
2738 ..
2739 } => {
2740 map_def(mapper, rd);
2741 map_use(mapper, rn);
2742 }
2743 &mut Inst::VecCSel {
2744 ref mut rd,
2745 ref mut rn,
2746 ref mut rm,
2747 ..
2748 } => {
2749 map_def(mapper, rd);
2750 map_use(mapper, rn);
2751 map_use(mapper, rm);
2752 }
2753 &mut Inst::FpuCmp32 {
2754 ref mut rn,
2755 ref mut rm,
2756 } => {
2757 map_use(mapper, rn);
2758 map_use(mapper, rm);
2759 }
2760 &mut Inst::FpuCmp64 {
2761 ref mut rn,
2762 ref mut rm,
2763 } => {
2764 map_use(mapper, rn);
2765 map_use(mapper, rm);
2766 }
2767 &mut Inst::FpuLoad32 {
2768 ref mut rd,
2769 ref mut mem,
2770 ..
2771 } => {
2772 map_def(mapper, rd);
2773 map_mem(mapper, mem);
2774 }
2775 &mut Inst::FpuLoad64 {
2776 ref mut rd,
2777 ref mut mem,
2778 ..
2779 } => {
2780 map_def(mapper, rd);
2781 map_mem(mapper, mem);
2782 }
2783 &mut Inst::FpuLoad128 {
2784 ref mut rd,
2785 ref mut mem,
2786 ..
2787 } => {
2788 map_def(mapper, rd);
2789 map_mem(mapper, mem);
2790 }
2791 &mut Inst::FpuStore32 {
2792 ref mut rd,
2793 ref mut mem,
2794 ..
2795 } => {
2796 map_use(mapper, rd);
2797 map_mem(mapper, mem);
2798 }
2799 &mut Inst::FpuStore64 {
2800 ref mut rd,
2801 ref mut mem,
2802 ..
2803 } => {
2804 map_use(mapper, rd);
2805 map_mem(mapper, mem);
2806 }
2807 &mut Inst::FpuStore128 {
2808 ref mut rd,
2809 ref mut mem,
2810 ..
2811 } => {
2812 map_use(mapper, rd);
2813 map_mem(mapper, mem);
2814 }
2815 &mut Inst::FpuLoadP64 {
2816 ref mut rt,
2817 ref mut rt2,
2818 ref mut mem,
2819 ..
2820 } => {
2821 map_def(mapper, rt);
2822 map_def(mapper, rt2);
2823 map_pairmem(mapper, mem);
2824 }
2825 &mut Inst::FpuStoreP64 {
2826 ref mut rt,
2827 ref mut rt2,
2828 ref mut mem,
2829 ..
2830 } => {
2831 map_use(mapper, rt);
2832 map_use(mapper, rt2);
2833 map_pairmem(mapper, mem);
2834 }
2835 &mut Inst::FpuLoadP128 {
2836 ref mut rt,
2837 ref mut rt2,
2838 ref mut mem,
2839 ..
2840 } => {
2841 map_def(mapper, rt);
2842 map_def(mapper, rt2);
2843 map_pairmem(mapper, mem);
2844 }
2845 &mut Inst::FpuStoreP128 {
2846 ref mut rt,
2847 ref mut rt2,
2848 ref mut mem,
2849 ..
2850 } => {
2851 map_use(mapper, rt);
2852 map_use(mapper, rt2);
2853 map_pairmem(mapper, mem);
2854 }
2855 &mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
2856 map_def(mapper, rd);
2857 }
2858 &mut Inst::LoadFpuConst128 { ref mut rd, .. } => {
2859 map_def(mapper, rd);
2860 }
2861 &mut Inst::FpuToInt {
2862 ref mut rd,
2863 ref mut rn,
2864 ..
2865 } => {
2866 map_def(mapper, rd);
2867 map_use(mapper, rn);
2868 }
2869 &mut Inst::IntToFpu {
2870 ref mut rd,
2871 ref mut rn,
2872 ..
2873 } => {
2874 map_def(mapper, rd);
2875 map_use(mapper, rn);
2876 }
2877 &mut Inst::FpuCSel32 {
2878 ref mut rd,
2879 ref mut rn,
2880 ref mut rm,
2881 ..
2882 } => {
2883 map_def(mapper, rd);
2884 map_use(mapper, rn);
2885 map_use(mapper, rm);
2886 }
2887 &mut Inst::FpuCSel64 {
2888 ref mut rd,
2889 ref mut rn,
2890 ref mut rm,
2891 ..
2892 } => {
2893 map_def(mapper, rd);
2894 map_use(mapper, rn);
2895 map_use(mapper, rm);
2896 }
2897 &mut Inst::FpuRound {
2898 ref mut rd,
2899 ref mut rn,
2900 ..
2901 } => {
2902 map_def(mapper, rd);
2903 map_use(mapper, rn);
2904 }
2905 &mut Inst::MovToFpu {
2906 ref mut rd,
2907 ref mut rn,
2908 ..
2909 } => {
2910 map_def(mapper, rd);
2911 map_use(mapper, rn);
2912 }
2913 &mut Inst::MovToVec {
2914 ref mut rd,
2915 ref mut rn,
2916 ..
2917 } => {
2918 map_mod(mapper, rd);
2919 map_use(mapper, rn);
2920 }
2921 &mut Inst::MovFromVec {
2922 ref mut rd,
2923 ref mut rn,
2924 ..
2925 }
2926 | &mut Inst::MovFromVecSigned {
2927 ref mut rd,
2928 ref mut rn,
2929 ..
2930 } => {
2931 map_def(mapper, rd);
2932 map_use(mapper, rn);
2933 }
2934 &mut Inst::VecDup {
2935 ref mut rd,
2936 ref mut rn,
2937 ..
2938 } => {
2939 map_def(mapper, rd);
2940 map_use(mapper, rn);
2941 }
2942 &mut Inst::VecDupFromFpu {
2943 ref mut rd,
2944 ref mut rn,
2945 ..
2946 } => {
2947 map_def(mapper, rd);
2948 map_use(mapper, rn);
2949 }
2950 &mut Inst::VecDupFPImm { ref mut rd, .. } => {
2951 map_def(mapper, rd);
2952 }
2953 &mut Inst::VecDupImm { ref mut rd, .. } => {
2954 map_def(mapper, rd);
2955 }
2956 &mut Inst::VecExtend {
2957 ref mut rd,
2958 ref mut rn,
2959 ..
2960 } => {
2961 map_def(mapper, rd);
2962 map_use(mapper, rn);
2963 }
2964 &mut Inst::VecMovElement {
2965 ref mut rd,
2966 ref mut rn,
2967 ..
2968 } => {
2969 map_mod(mapper, rd);
2970 map_use(mapper, rn);
2971 }
2972 &mut Inst::VecRRLong {
2973 ref mut rd,
2974 ref mut rn,
2975 ..
2976 } => {
2977 map_def(mapper, rd);
2978 map_use(mapper, rn);
2979 }
2980 &mut Inst::VecRRNarrow {
2981 ref mut rd,
2982 ref mut rn,
2983 high_half,
2984 ..
2985 } => {
2986 map_use(mapper, rn);
2987
2988 if high_half {
2989 map_mod(mapper, rd);
2990 } else {
2991 map_def(mapper, rd);
2992 }
2993 }
2994 &mut Inst::VecRRPair {
2995 ref mut rd,
2996 ref mut rn,
2997 ..
2998 } => {
2999 map_def(mapper, rd);
3000 map_use(mapper, rn);
3001 }
3002 &mut Inst::VecRRRLong {
3003 alu_op,
3004 ref mut rd,
3005 ref mut rn,
3006 ref mut rm,
3007 ..
3008 } => {
3009 match alu_op {
3010 VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
3011 map_mod(mapper, rd)
3012 }
3013 _ => map_def(mapper, rd),
3014 };
3015 map_use(mapper, rn);
3016 map_use(mapper, rm);
3017 }
3018 &mut Inst::VecRRPairLong {
3019 ref mut rd,
3020 ref mut rn,
3021 ..
3022 } => {
3023 map_def(mapper, rd);
3024 map_use(mapper, rn);
3025 }
3026 &mut Inst::VecRRR {
3027 alu_op,
3028 ref mut rd,
3029 ref mut rn,
3030 ref mut rm,
3031 ..
3032 } => {
3033 if alu_op == VecALUOp::Bsl {
3034 map_mod(mapper, rd);
3035 } else {
3036 map_def(mapper, rd);
3037 }
3038 map_use(mapper, rn);
3039 map_use(mapper, rm);
3040 }
3041 &mut Inst::MovToNZCV { ref mut rn } => {
3042 map_use(mapper, rn);
3043 }
3044 &mut Inst::MovFromNZCV { ref mut rd } => {
3045 map_def(mapper, rd);
3046 }
3047 &mut Inst::Extend {
3048 ref mut rd,
3049 ref mut rn,
3050 ..
3051 } => {
3052 map_def(mapper, rd);
3053 map_use(mapper, rn);
3054 }
3055 &mut Inst::Jump { .. } => {}
3056 &mut Inst::Call { ref mut info } => {
3057 for r in info.uses.iter_mut() {
3058 map_use(mapper, r);
3059 }
3060 for r in info.defs.iter_mut() {
3061 map_def(mapper, r);
3062 }
3063 }
3064 &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
3065 &mut Inst::CallInd { ref mut info, .. } => {
3066 for r in info.uses.iter_mut() {
3067 map_use(mapper, r);
3068 }
3069 for r in info.defs.iter_mut() {
3070 map_def(mapper, r);
3071 }
3072 map_use(mapper, &mut info.rn);
3073 }
3074 &mut Inst::CondBr { ref mut kind, .. } => {
3075 map_br(mapper, kind);
3076 }
3077 &mut Inst::IndirectBr { ref mut rn, .. } => {
3078 map_use(mapper, rn);
3079 }
3080 &mut Inst::Nop0 | &mut Inst::Nop4 | &mut Inst::Brk | &mut Inst::Udf { .. } => {}
3081 &mut Inst::TrapIf { ref mut kind, .. } => {
3082 map_br(mapper, kind);
3083 }
3084 &mut Inst::Adr { ref mut rd, .. } => {
3085 map_def(mapper, rd);
3086 }
3087 &mut Inst::Word4 { .. } | &mut Inst::Word8 { .. } => {}
3088 &mut Inst::JTSequence {
3089 ref mut ridx,
3090 ref mut rtmp1,
3091 ref mut rtmp2,
3092 ..
3093 } => {
3094 map_use(mapper, ridx);
3095 map_def(mapper, rtmp1);
3096 map_def(mapper, rtmp2);
3097 }
3098 &mut Inst::LoadExtName { ref mut rd, .. } => {
3099 map_def(mapper, rd);
3100 }
3101 &mut Inst::LoadAddr {
3102 ref mut rd,
3103 ref mut mem,
3104 } => {
3105 map_def(mapper, rd);
3106 map_mem(mapper, mem);
3107 }
3108 &mut Inst::VirtualSPOffsetAdj { .. } => {}
3109 &mut Inst::EmitIsland { .. } => {}
3110 &mut Inst::ElfTlsGetAddr { .. } => {}
3111 &mut Inst::ValueLabelMarker { ref mut reg, .. } => {
3112 map_use(mapper, reg);
3113 }
3114 &mut Inst::Unwind { .. } => {}
3115 }
3116 }
3117
3118 //=============================================================================
3119 // Instructions: misc functions and external interface
3120
3121 impl MachInst for Inst {
3122 type LabelUse = LabelUse;
3123
get_regs(&self, collector: &mut RegUsageCollector)3124 fn get_regs(&self, collector: &mut RegUsageCollector) {
3125 aarch64_get_regs(self, collector)
3126 }
3127
map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM)3128 fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
3129 aarch64_map_regs(self, mapper);
3130 }
3131
is_move(&self) -> Option<(Writable<Reg>, Reg)>3132 fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
3133 match self {
3134 &Inst::Mov64 { rd, rm } => Some((rd, rm)),
3135 &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
3136 &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
3137 _ => None,
3138 }
3139 }
3140
is_epilogue_placeholder(&self) -> bool3141 fn is_epilogue_placeholder(&self) -> bool {
3142 if let Inst::EpiloguePlaceholder = self {
3143 true
3144 } else {
3145 false
3146 }
3147 }
3148
is_included_in_clobbers(&self) -> bool3149 fn is_included_in_clobbers(&self) -> bool {
3150 // We exclude call instructions from the clobber-set when they are calls
3151 // from caller to callee with the same ABI. Such calls cannot possibly
3152 // force any new registers to be saved in the prologue, because anything
3153 // that the callee clobbers, the caller is also allowed to clobber. This
3154 // both saves work and enables us to more precisely follow the
3155 // half-caller-save, half-callee-save SysV ABI for some vector
3156 // registers.
3157 //
3158 // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
3159 // more information on this ABI-implementation hack.
3160 match self {
3161 &Inst::Call { ref info } => info.caller_callconv != info.callee_callconv,
3162 &Inst::CallInd { ref info } => info.caller_callconv != info.callee_callconv,
3163 _ => true,
3164 }
3165 }
3166
is_term<'a>(&'a self) -> MachTerminator<'a>3167 fn is_term<'a>(&'a self) -> MachTerminator<'a> {
3168 match self {
3169 &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
3170 &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
3171 &Inst::CondBr {
3172 taken, not_taken, ..
3173 } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
3174 &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]),
3175 &Inst::JTSequence { ref info, .. } => {
3176 MachTerminator::Indirect(&info.targets_for_term[..])
3177 }
3178 _ => MachTerminator::None,
3179 }
3180 }
3181
gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst3182 fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
3183 let bits = ty.bits();
3184
3185 assert!(bits <= 128);
3186 assert!(to_reg.to_reg().get_class() == from_reg.get_class());
3187
3188 if from_reg.get_class() == RegClass::I64 {
3189 Inst::Mov64 {
3190 rd: to_reg,
3191 rm: from_reg,
3192 }
3193 } else if from_reg.get_class() == RegClass::V128 {
3194 if bits > 64 {
3195 Inst::FpuMove128 {
3196 rd: to_reg,
3197 rn: from_reg,
3198 }
3199 } else {
3200 Inst::FpuMove64 {
3201 rd: to_reg,
3202 rn: from_reg,
3203 }
3204 }
3205 } else {
3206 panic!("Unexpected register class: {:?}", from_reg.get_class());
3207 }
3208 }
3209
gen_constant<F: FnMut(Type) -> Writable<Reg>>( to_regs: ValueRegs<Writable<Reg>>, value: u128, ty: Type, alloc_tmp: F, ) -> SmallVec<[Inst; 4]>3210 fn gen_constant<F: FnMut(Type) -> Writable<Reg>>(
3211 to_regs: ValueRegs<Writable<Reg>>,
3212 value: u128,
3213 ty: Type,
3214 alloc_tmp: F,
3215 ) -> SmallVec<[Inst; 4]> {
3216 let to_reg = to_regs.only_reg();
3217 match ty {
3218 F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp),
3219 F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp),
3220 B1 | B8 | B16 | B32 | B64 | I8 | I16 | I32 | I64 | R32 | R64 => {
3221 Inst::load_constant(to_reg.unwrap(), value as u64)
3222 }
3223 I128 => Inst::load_constant128(to_regs, value),
3224 _ => panic!("Cannot generate constant for type: {}", ty),
3225 }
3226 }
3227
gen_nop(preferred_size: usize) -> Inst3228 fn gen_nop(preferred_size: usize) -> Inst {
3229 if preferred_size == 0 {
3230 return Inst::Nop0;
3231 }
3232 // We can't give a NOP (or any insn) < 4 bytes.
3233 assert!(preferred_size >= 4);
3234 Inst::Nop4
3235 }
3236
maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst>3237 fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
3238 None
3239 }
3240
rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>3241 fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
3242 match ty {
3243 I8 => Ok((&[RegClass::I64], &[I8])),
3244 I16 => Ok((&[RegClass::I64], &[I16])),
3245 I32 => Ok((&[RegClass::I64], &[I32])),
3246 I64 => Ok((&[RegClass::I64], &[I64])),
3247 B1 => Ok((&[RegClass::I64], &[B1])),
3248 B8 => Ok((&[RegClass::I64], &[B8])),
3249 B16 => Ok((&[RegClass::I64], &[B16])),
3250 B32 => Ok((&[RegClass::I64], &[B32])),
3251 B64 => Ok((&[RegClass::I64], &[B64])),
3252 R32 => panic!("32-bit reftype pointer should never be seen on AArch64"),
3253 R64 => Ok((&[RegClass::I64], &[R64])),
3254 F32 => Ok((&[RegClass::V128], &[F32])),
3255 F64 => Ok((&[RegClass::V128], &[F64])),
3256 I128 => Ok((&[RegClass::I64, RegClass::I64], &[I64, I64])),
3257 B128 => Ok((&[RegClass::I64, RegClass::I64], &[B64, B64])),
3258 _ if ty.is_vector() => {
3259 assert!(ty.bits() <= 128);
3260 Ok((&[RegClass::V128], &[I8X16]))
3261 }
3262 IFLAGS | FFLAGS => Ok((&[RegClass::I64], &[I64])),
3263 _ => Err(CodegenError::Unsupported(format!(
3264 "Unexpected SSA-value type: {}",
3265 ty
3266 ))),
3267 }
3268 }
3269
gen_jump(target: MachLabel) -> Inst3270 fn gen_jump(target: MachLabel) -> Inst {
3271 Inst::Jump {
3272 dest: BranchTarget::Label(target),
3273 }
3274 }
3275
reg_universe(flags: &settings::Flags) -> RealRegUniverse3276 fn reg_universe(flags: &settings::Flags) -> RealRegUniverse {
3277 create_reg_universe(flags)
3278 }
3279
worst_case_size() -> CodeOffset3280 fn worst_case_size() -> CodeOffset {
3281 // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
3282 // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
3283 // 64-bit f64 constants.
3284 //
3285 // Note that inline jump-tables handle island/pool insertion separately, so we do not need
3286 // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
3287 // feasible for other reasons).
3288 44
3289 }
3290
ref_type_regclass(_: &settings::Flags) -> RegClass3291 fn ref_type_regclass(_: &settings::Flags) -> RegClass {
3292 RegClass::I64
3293 }
3294
gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self3295 fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self {
3296 Inst::ValueLabelMarker { label, reg }
3297 }
3298
defines_value_label(&self) -> Option<(ValueLabel, Reg)>3299 fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
3300 match self {
3301 Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)),
3302 _ => None,
3303 }
3304 }
3305 }
3306
3307 //=============================================================================
3308 // Pretty-printing of instructions.
3309
mem_finalize_for_show( mem: &AMode, mb_rru: Option<&RealRegUniverse>, state: &EmitState, ) -> (String, AMode)3310 fn mem_finalize_for_show(
3311 mem: &AMode,
3312 mb_rru: Option<&RealRegUniverse>,
3313 state: &EmitState,
3314 ) -> (String, AMode) {
3315 let (mem_insts, mem) = mem_finalize(0, mem, state);
3316 let mut mem_str = mem_insts
3317 .into_iter()
3318 .map(|inst| inst.show_rru(mb_rru))
3319 .collect::<Vec<_>>()
3320 .join(" ; ");
3321 if !mem_str.is_empty() {
3322 mem_str += " ; ";
3323 }
3324
3325 (mem_str, mem)
3326 }
3327
3328 impl PrettyPrint for Inst {
show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String3329 fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
3330 self.pretty_print(mb_rru, &mut EmitState::default())
3331 }
3332 }
3333
3334 impl Inst {
print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String3335 fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
3336 fn op_name_size(alu_op: ALUOp) -> (&'static str, OperandSize) {
3337 match alu_op {
3338 ALUOp::Add32 => ("add", OperandSize::Size32),
3339 ALUOp::Add64 => ("add", OperandSize::Size64),
3340 ALUOp::Sub32 => ("sub", OperandSize::Size32),
3341 ALUOp::Sub64 => ("sub", OperandSize::Size64),
3342 ALUOp::Orr32 => ("orr", OperandSize::Size32),
3343 ALUOp::Orr64 => ("orr", OperandSize::Size64),
3344 ALUOp::And32 => ("and", OperandSize::Size32),
3345 ALUOp::And64 => ("and", OperandSize::Size64),
3346 ALUOp::AndS32 => ("ands", OperandSize::Size32),
3347 ALUOp::AndS64 => ("ands", OperandSize::Size64),
3348 ALUOp::Eor32 => ("eor", OperandSize::Size32),
3349 ALUOp::Eor64 => ("eor", OperandSize::Size64),
3350 ALUOp::AddS32 => ("adds", OperandSize::Size32),
3351 ALUOp::AddS64 => ("adds", OperandSize::Size64),
3352 ALUOp::SubS32 => ("subs", OperandSize::Size32),
3353 ALUOp::SubS64 => ("subs", OperandSize::Size64),
3354 ALUOp::SMulH => ("smulh", OperandSize::Size64),
3355 ALUOp::UMulH => ("umulh", OperandSize::Size64),
3356 ALUOp::SDiv64 => ("sdiv", OperandSize::Size64),
3357 ALUOp::UDiv64 => ("udiv", OperandSize::Size64),
3358 ALUOp::AndNot32 => ("bic", OperandSize::Size32),
3359 ALUOp::AndNot64 => ("bic", OperandSize::Size64),
3360 ALUOp::OrrNot32 => ("orn", OperandSize::Size32),
3361 ALUOp::OrrNot64 => ("orn", OperandSize::Size64),
3362 ALUOp::EorNot32 => ("eon", OperandSize::Size32),
3363 ALUOp::EorNot64 => ("eon", OperandSize::Size64),
3364 ALUOp::RotR32 => ("ror", OperandSize::Size32),
3365 ALUOp::RotR64 => ("ror", OperandSize::Size64),
3366 ALUOp::Lsr32 => ("lsr", OperandSize::Size32),
3367 ALUOp::Lsr64 => ("lsr", OperandSize::Size64),
3368 ALUOp::Asr32 => ("asr", OperandSize::Size32),
3369 ALUOp::Asr64 => ("asr", OperandSize::Size64),
3370 ALUOp::Lsl32 => ("lsl", OperandSize::Size32),
3371 ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
3372 ALUOp::Adc32 => ("adc", OperandSize::Size32),
3373 ALUOp::Adc64 => ("adc", OperandSize::Size64),
3374 ALUOp::AdcS32 => ("adcs", OperandSize::Size32),
3375 ALUOp::AdcS64 => ("adcs", OperandSize::Size64),
3376 ALUOp::Sbc32 => ("sbc", OperandSize::Size32),
3377 ALUOp::Sbc64 => ("sbc", OperandSize::Size64),
3378 ALUOp::SbcS32 => ("sbcs", OperandSize::Size32),
3379 ALUOp::SbcS64 => ("sbcs", OperandSize::Size64),
3380 }
3381 }
3382
3383 match self {
3384 &Inst::Nop0 => "nop-zero-len".to_string(),
3385 &Inst::Nop4 => "nop".to_string(),
3386 &Inst::AluRRR { alu_op, rd, rn, rm } => {
3387 let (op, size) = op_name_size(alu_op);
3388 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3389 let rn = show_ireg_sized(rn, mb_rru, size);
3390 let rm = show_ireg_sized(rm, mb_rru, size);
3391 format!("{} {}, {}, {}", op, rd, rn, rm)
3392 }
3393 &Inst::AluRRRR {
3394 alu_op,
3395 rd,
3396 rn,
3397 rm,
3398 ra,
3399 } => {
3400 let (op, size) = match alu_op {
3401 ALUOp3::MAdd32 => ("madd", OperandSize::Size32),
3402 ALUOp3::MAdd64 => ("madd", OperandSize::Size64),
3403 ALUOp3::MSub32 => ("msub", OperandSize::Size32),
3404 ALUOp3::MSub64 => ("msub", OperandSize::Size64),
3405 };
3406 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3407 let rn = show_ireg_sized(rn, mb_rru, size);
3408 let rm = show_ireg_sized(rm, mb_rru, size);
3409 let ra = show_ireg_sized(ra, mb_rru, size);
3410
3411 format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
3412 }
3413 &Inst::AluRRImm12 {
3414 alu_op,
3415 rd,
3416 rn,
3417 ref imm12,
3418 } => {
3419 let (op, size) = op_name_size(alu_op);
3420 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3421 let rn = show_ireg_sized(rn, mb_rru, size);
3422
3423 if imm12.bits == 0 && alu_op == ALUOp::Add64 {
3424 // special-case MOV (used for moving into SP).
3425 format!("mov {}, {}", rd, rn)
3426 } else {
3427 let imm12 = imm12.show_rru(mb_rru);
3428 format!("{} {}, {}, {}", op, rd, rn, imm12)
3429 }
3430 }
3431 &Inst::AluRRImmLogic {
3432 alu_op,
3433 rd,
3434 rn,
3435 ref imml,
3436 } => {
3437 let (op, size) = op_name_size(alu_op);
3438 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3439 let rn = show_ireg_sized(rn, mb_rru, size);
3440 let imml = imml.show_rru(mb_rru);
3441 format!("{} {}, {}, {}", op, rd, rn, imml)
3442 }
3443 &Inst::AluRRImmShift {
3444 alu_op,
3445 rd,
3446 rn,
3447 ref immshift,
3448 } => {
3449 let (op, size) = op_name_size(alu_op);
3450 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3451 let rn = show_ireg_sized(rn, mb_rru, size);
3452 let immshift = immshift.show_rru(mb_rru);
3453 format!("{} {}, {}, {}", op, rd, rn, immshift)
3454 }
3455 &Inst::AluRRRShift {
3456 alu_op,
3457 rd,
3458 rn,
3459 rm,
3460 ref shiftop,
3461 } => {
3462 let (op, size) = op_name_size(alu_op);
3463 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3464 let rn = show_ireg_sized(rn, mb_rru, size);
3465 let rm = show_ireg_sized(rm, mb_rru, size);
3466 let shiftop = shiftop.show_rru(mb_rru);
3467 format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop)
3468 }
3469 &Inst::AluRRRExtend {
3470 alu_op,
3471 rd,
3472 rn,
3473 rm,
3474 ref extendop,
3475 } => {
3476 let (op, size) = op_name_size(alu_op);
3477 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3478 let rn = show_ireg_sized(rn, mb_rru, size);
3479 let rm = show_ireg_sized(rm, mb_rru, size);
3480 let extendop = extendop.show_rru(mb_rru);
3481 format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
3482 }
3483 &Inst::BitRR { op, rd, rn } => {
3484 let size = op.operand_size();
3485 let op = op.op_str();
3486 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3487 let rn = show_ireg_sized(rn, mb_rru, size);
3488 format!("{} {}, {}", op, rd, rn)
3489 }
3490 &Inst::ULoad8 { rd, ref mem, .. }
3491 | &Inst::SLoad8 { rd, ref mem, .. }
3492 | &Inst::ULoad16 { rd, ref mem, .. }
3493 | &Inst::SLoad16 { rd, ref mem, .. }
3494 | &Inst::ULoad32 { rd, ref mem, .. }
3495 | &Inst::SLoad32 { rd, ref mem, .. }
3496 | &Inst::ULoad64 { rd, ref mem, .. } => {
3497 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3498
3499 let is_unscaled = match &mem {
3500 &AMode::Unscaled(..) => true,
3501 _ => false,
3502 };
3503 let (op, size) = match (self, is_unscaled) {
3504 (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
3505 (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
3506 (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
3507 (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
3508 (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
3509 (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
3510 (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
3511 (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
3512 (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
3513 (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
3514 (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
3515 (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
3516 (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
3517 (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
3518 _ => unreachable!(),
3519 };
3520 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3521 let mem = mem.show_rru(mb_rru);
3522 format!("{}{} {}, {}", mem_str, op, rd, mem)
3523 }
3524 &Inst::Store8 { rd, ref mem, .. }
3525 | &Inst::Store16 { rd, ref mem, .. }
3526 | &Inst::Store32 { rd, ref mem, .. }
3527 | &Inst::Store64 { rd, ref mem, .. } => {
3528 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3529
3530 let is_unscaled = match &mem {
3531 &AMode::Unscaled(..) => true,
3532 _ => false,
3533 };
3534 let (op, size) = match (self, is_unscaled) {
3535 (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
3536 (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
3537 (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
3538 (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
3539 (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
3540 (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
3541 (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
3542 (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
3543 _ => unreachable!(),
3544 };
3545 let rd = show_ireg_sized(rd, mb_rru, size);
3546 let mem = mem.show_rru(mb_rru);
3547 format!("{}{} {}, {}", mem_str, op, rd, mem)
3548 }
3549 &Inst::StoreP64 {
3550 rt, rt2, ref mem, ..
3551 } => {
3552 let rt = rt.show_rru(mb_rru);
3553 let rt2 = rt2.show_rru(mb_rru);
3554 let mem = mem.show_rru(mb_rru);
3555 format!("stp {}, {}, {}", rt, rt2, mem)
3556 }
3557 &Inst::LoadP64 {
3558 rt, rt2, ref mem, ..
3559 } => {
3560 let rt = rt.to_reg().show_rru(mb_rru);
3561 let rt2 = rt2.to_reg().show_rru(mb_rru);
3562 let mem = mem.show_rru(mb_rru);
3563 format!("ldp {}, {}, {}", rt, rt2, mem)
3564 }
3565 &Inst::Mov64 { rd, rm } => {
3566 let rd = rd.to_reg().show_rru(mb_rru);
3567 let rm = rm.show_rru(mb_rru);
3568 format!("mov {}, {}", rd, rm)
3569 }
3570 &Inst::Mov32 { rd, rm } => {
3571 let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
3572 let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32);
3573 format!("mov {}, {}", rd, rm)
3574 }
3575 &Inst::MovZ { rd, ref imm, size } => {
3576 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3577 let imm = imm.show_rru(mb_rru);
3578 format!("movz {}, {}", rd, imm)
3579 }
3580 &Inst::MovN { rd, ref imm, size } => {
3581 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3582 let imm = imm.show_rru(mb_rru);
3583 format!("movn {}, {}", rd, imm)
3584 }
3585 &Inst::MovK { rd, ref imm, size } => {
3586 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
3587 let imm = imm.show_rru(mb_rru);
3588 format!("movk {}, {}", rd, imm)
3589 }
3590 &Inst::CSel { rd, rn, rm, cond } => {
3591 let rd = rd.to_reg().show_rru(mb_rru);
3592 let rn = rn.show_rru(mb_rru);
3593 let rm = rm.show_rru(mb_rru);
3594 let cond = cond.show_rru(mb_rru);
3595 format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
3596 }
3597 &Inst::CSet { rd, cond } => {
3598 let rd = rd.to_reg().show_rru(mb_rru);
3599 let cond = cond.show_rru(mb_rru);
3600 format!("cset {}, {}", rd, cond)
3601 }
3602 &Inst::CSetm { rd, cond } => {
3603 let rd = rd.to_reg().show_rru(mb_rru);
3604 let cond = cond.show_rru(mb_rru);
3605 format!("csetm {}, {}", rd, cond)
3606 }
3607 &Inst::CCmpImm {
3608 size,
3609 rn,
3610 imm,
3611 nzcv,
3612 cond,
3613 } => {
3614 let rn = show_ireg_sized(rn, mb_rru, size);
3615 let imm = imm.show_rru(mb_rru);
3616 let nzcv = nzcv.show_rru(mb_rru);
3617 let cond = cond.show_rru(mb_rru);
3618 format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
3619 }
3620 &Inst::AtomicRMW { ty, op, .. } => {
3621 format!(
3622 "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
3623 ty.bits(), op)
3624 }
3625 &Inst::AtomicCAS { rs, rt, rn, ty } => {
3626 let op = match ty {
3627 I8 => "casalb",
3628 I16 => "casalh",
3629 I32 | I64 => "casal",
3630 _ => panic!("Unsupported type: {}", ty),
3631 };
3632 let size = OperandSize::from_ty(ty);
3633 let rs = show_ireg_sized(rs.to_reg(), mb_rru, size);
3634 let rt = show_ireg_sized(rt, mb_rru, size);
3635 let rn = rn.show_rru(mb_rru);
3636
3637 format!("{} {}, {}, [{}]", op, rs, rt, rn)
3638 }
3639 &Inst::AtomicCASLoop { ty } => {
3640 format!(
3641 "atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
3642 ty.bits())
3643 }
3644 &Inst::LoadAcquire {
3645 access_ty, rt, rn, ..
3646 } => {
3647 let (op, ty) = match access_ty {
3648 I8 => ("ldarb", I32),
3649 I16 => ("ldarh", I32),
3650 I32 => ("ldar", I32),
3651 I64 => ("ldar", I64),
3652 _ => panic!("Unsupported type: {}", access_ty),
3653 };
3654 let size = OperandSize::from_ty(ty);
3655 let rt = show_ireg_sized(rt.to_reg(), mb_rru, size);
3656 let rn = rn.show_rru(mb_rru);
3657 format!("{} {}, [{}]", op, rt, rn)
3658 }
3659 &Inst::StoreRelease {
3660 access_ty, rt, rn, ..
3661 } => {
3662 let (op, ty) = match access_ty {
3663 I8 => ("stlrb", I32),
3664 I16 => ("stlrh", I32),
3665 I32 => ("stlr", I32),
3666 I64 => ("stlr", I64),
3667 _ => panic!("Unsupported type: {}", access_ty),
3668 };
3669 let size = OperandSize::from_ty(ty);
3670 let rt = show_ireg_sized(rt, mb_rru, size);
3671 let rn = rn.show_rru(mb_rru);
3672 format!("{} {}, [{}]", op, rt, rn)
3673 }
3674 &Inst::Fence {} => {
3675 format!("dmb ish")
3676 }
3677 &Inst::FpuMove64 { rd, rn } => {
3678 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
3679 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
3680 format!("fmov {}, {}", rd, rn)
3681 }
3682 &Inst::FpuMove128 { rd, rn } => {
3683 let rd = rd.to_reg().show_rru(mb_rru);
3684 let rn = rn.show_rru(mb_rru);
3685 format!("mov {}.16b, {}.16b", rd, rn)
3686 }
3687 &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
3688 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
3689 let rn = show_vreg_element(rn, mb_rru, idx, size);
3690 format!("mov {}, {}", rd, rn)
3691 }
3692 &Inst::FpuExtend { rd, rn, size } => {
3693 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3694 let rn = show_vreg_scalar(rn, mb_rru, size);
3695
3696 format!("fmov {}, {}", rd, rn)
3697 }
3698 &Inst::FpuRR { fpu_op, rd, rn } => {
3699 let (op, sizesrc, sizedest) = match fpu_op {
3700 FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32),
3701 FPUOp1::Abs64 => ("fabs", ScalarSize::Size64, ScalarSize::Size64),
3702 FPUOp1::Neg32 => ("fneg", ScalarSize::Size32, ScalarSize::Size32),
3703 FPUOp1::Neg64 => ("fneg", ScalarSize::Size64, ScalarSize::Size64),
3704 FPUOp1::Sqrt32 => ("fsqrt", ScalarSize::Size32, ScalarSize::Size32),
3705 FPUOp1::Sqrt64 => ("fsqrt", ScalarSize::Size64, ScalarSize::Size64),
3706 FPUOp1::Cvt32To64 => ("fcvt", ScalarSize::Size32, ScalarSize::Size64),
3707 FPUOp1::Cvt64To32 => ("fcvt", ScalarSize::Size64, ScalarSize::Size32),
3708 };
3709 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
3710 let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
3711 format!("{} {}, {}", op, rd, rn)
3712 }
3713 &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
3714 let (op, size) = match fpu_op {
3715 FPUOp2::Add32 => ("fadd", ScalarSize::Size32),
3716 FPUOp2::Add64 => ("fadd", ScalarSize::Size64),
3717 FPUOp2::Sub32 => ("fsub", ScalarSize::Size32),
3718 FPUOp2::Sub64 => ("fsub", ScalarSize::Size64),
3719 FPUOp2::Mul32 => ("fmul", ScalarSize::Size32),
3720 FPUOp2::Mul64 => ("fmul", ScalarSize::Size64),
3721 FPUOp2::Div32 => ("fdiv", ScalarSize::Size32),
3722 FPUOp2::Div64 => ("fdiv", ScalarSize::Size64),
3723 FPUOp2::Max32 => ("fmax", ScalarSize::Size32),
3724 FPUOp2::Max64 => ("fmax", ScalarSize::Size64),
3725 FPUOp2::Min32 => ("fmin", ScalarSize::Size32),
3726 FPUOp2::Min64 => ("fmin", ScalarSize::Size64),
3727 FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
3728 FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
3729 FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
3730 FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
3731 };
3732 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3733 let rn = show_vreg_scalar(rn, mb_rru, size);
3734 let rm = show_vreg_scalar(rm, mb_rru, size);
3735 format!("{} {}, {}, {}", op, rd, rn, rm)
3736 }
3737 &Inst::FpuRRI { fpu_op, rd, rn } => {
3738 let (op, imm, vector) = match fpu_op {
3739 FPUOpRI::UShr32(imm) => ("ushr", imm.show_rru(mb_rru), true),
3740 FPUOpRI::UShr64(imm) => ("ushr", imm.show_rru(mb_rru), false),
3741 FPUOpRI::Sli32(imm) => ("sli", imm.show_rru(mb_rru), true),
3742 FPUOpRI::Sli64(imm) => ("sli", imm.show_rru(mb_rru), false),
3743 };
3744
3745 let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
3746 |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2)
3747 } else {
3748 |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
3749 };
3750 let rd = show_vreg_fn(rd.to_reg(), mb_rru);
3751 let rn = show_vreg_fn(rn, mb_rru);
3752 format!("{} {}, {}, {}", op, rd, rn, imm)
3753 }
3754 &Inst::FpuRRRR {
3755 fpu_op,
3756 rd,
3757 rn,
3758 rm,
3759 ra,
3760 } => {
3761 let (op, size) = match fpu_op {
3762 FPUOp3::MAdd32 => ("fmadd", ScalarSize::Size32),
3763 FPUOp3::MAdd64 => ("fmadd", ScalarSize::Size64),
3764 };
3765 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3766 let rn = show_vreg_scalar(rn, mb_rru, size);
3767 let rm = show_vreg_scalar(rm, mb_rru, size);
3768 let ra = show_vreg_scalar(ra, mb_rru, size);
3769 format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
3770 }
3771 &Inst::FpuCmp32 { rn, rm } => {
3772 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
3773 let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
3774 format!("fcmp {}, {}", rn, rm)
3775 }
3776 &Inst::FpuCmp64 { rn, rm } => {
3777 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
3778 let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
3779 format!("fcmp {}, {}", rn, rm)
3780 }
3781 &Inst::FpuLoad32 { rd, ref mem, .. } => {
3782 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
3783 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3784 let mem = mem.show_rru(mb_rru);
3785 format!("{}ldr {}, {}", mem_str, rd, mem)
3786 }
3787 &Inst::FpuLoad64 { rd, ref mem, .. } => {
3788 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
3789 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3790 let mem = mem.show_rru(mb_rru);
3791 format!("{}ldr {}, {}", mem_str, rd, mem)
3792 }
3793 &Inst::FpuLoad128 { rd, ref mem, .. } => {
3794 let rd = rd.to_reg().show_rru(mb_rru);
3795 let rd = "q".to_string() + &rd[1..];
3796 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3797 let mem = mem.show_rru(mb_rru);
3798 format!("{}ldr {}, {}", mem_str, rd, mem)
3799 }
3800 &Inst::FpuStore32 { rd, ref mem, .. } => {
3801 let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size32);
3802 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3803 let mem = mem.show_rru(mb_rru);
3804 format!("{}str {}, {}", mem_str, rd, mem)
3805 }
3806 &Inst::FpuStore64 { rd, ref mem, .. } => {
3807 let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size64);
3808 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3809 let mem = mem.show_rru(mb_rru);
3810 format!("{}str {}, {}", mem_str, rd, mem)
3811 }
3812 &Inst::FpuStore128 { rd, ref mem, .. } => {
3813 let rd = rd.show_rru(mb_rru);
3814 let rd = "q".to_string() + &rd[1..];
3815 let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
3816 let mem = mem.show_rru(mb_rru);
3817 format!("{}str {}, {}", mem_str, rd, mem)
3818 }
3819 &Inst::FpuLoadP64 {
3820 rt, rt2, ref mem, ..
3821 } => {
3822 let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size64);
3823 let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size64);
3824 let mem = mem.show_rru(mb_rru);
3825
3826 format!("ldp {}, {}, {}", rt, rt2, mem)
3827 }
3828 &Inst::FpuStoreP64 {
3829 rt, rt2, ref mem, ..
3830 } => {
3831 let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size64);
3832 let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size64);
3833 let mem = mem.show_rru(mb_rru);
3834
3835 format!("stp {}, {}, {}", rt, rt2, mem)
3836 }
3837 &Inst::FpuLoadP128 {
3838 rt, rt2, ref mem, ..
3839 } => {
3840 let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size128);
3841 let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size128);
3842 let mem = mem.show_rru(mb_rru);
3843
3844 format!("ldp {}, {}, {}", rt, rt2, mem)
3845 }
3846 &Inst::FpuStoreP128 {
3847 rt, rt2, ref mem, ..
3848 } => {
3849 let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size128);
3850 let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size128);
3851 let mem = mem.show_rru(mb_rru);
3852
3853 format!("stp {}, {}, {}", rt, rt2, mem)
3854 }
3855 &Inst::LoadFpuConst64 { rd, const_data } => {
3856 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
3857 format!(
3858 "ldr {}, pc+8 ; b 12 ; data.f64 {}",
3859 rd,
3860 f64::from_bits(const_data)
3861 )
3862 }
3863 &Inst::LoadFpuConst128 { rd, const_data } => {
3864 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
3865 format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data)
3866 }
3867 &Inst::FpuToInt { op, rd, rn } => {
3868 let (op, sizesrc, sizedest) = match op {
3869 FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
3870 FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
3871 FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
3872 FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
3873 FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
3874 FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
3875 FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
3876 FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
3877 };
3878 let rd = show_ireg_sized(rd.to_reg(), mb_rru, sizedest);
3879 let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
3880 format!("{} {}, {}", op, rd, rn)
3881 }
3882 &Inst::IntToFpu { op, rd, rn } => {
3883 let (op, sizesrc, sizedest) = match op {
3884 IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
3885 IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
3886 IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
3887 IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
3888 IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
3889 IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
3890 IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
3891 IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
3892 };
3893 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
3894 let rn = show_ireg_sized(rn, mb_rru, sizesrc);
3895 format!("{} {}, {}", op, rd, rn)
3896 }
3897 &Inst::FpuCSel32 { rd, rn, rm, cond } => {
3898 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
3899 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
3900 let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
3901 let cond = cond.show_rru(mb_rru);
3902 format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
3903 }
3904 &Inst::FpuCSel64 { rd, rn, rm, cond } => {
3905 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
3906 let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
3907 let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
3908 let cond = cond.show_rru(mb_rru);
3909 format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
3910 }
3911 &Inst::FpuRound { op, rd, rn } => {
3912 let (inst, size) = match op {
3913 FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
3914 FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
3915 FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
3916 FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
3917 FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
3918 FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
3919 FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
3920 FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
3921 };
3922 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3923 let rn = show_vreg_scalar(rn, mb_rru, size);
3924 format!("{} {}, {}", inst, rd, rn)
3925 }
3926 &Inst::MovToFpu { rd, rn, size } => {
3927 let operand_size = size.operand_size();
3928 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
3929 let rn = show_ireg_sized(rn, mb_rru, operand_size);
3930 format!("fmov {}, {}", rd, rn)
3931 }
3932 &Inst::MovToVec { rd, rn, idx, size } => {
3933 let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
3934 let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
3935 format!("mov {}, {}", rd, rn)
3936 }
3937 &Inst::MovFromVec { rd, rn, idx, size } => {
3938 let op = match size {
3939 VectorSize::Size8x16 => "umov",
3940 VectorSize::Size16x8 => "umov",
3941 VectorSize::Size32x4 => "mov",
3942 VectorSize::Size64x2 => "mov",
3943 _ => unimplemented!(),
3944 };
3945 let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size());
3946 let rn = show_vreg_element(rn, mb_rru, idx, size);
3947 format!("{} {}, {}", op, rd, rn)
3948 }
3949 &Inst::MovFromVecSigned {
3950 rd,
3951 rn,
3952 idx,
3953 size,
3954 scalar_size,
3955 } => {
3956 let rd = show_ireg_sized(rd.to_reg(), mb_rru, scalar_size);
3957 let rn = show_vreg_element(rn, mb_rru, idx, size);
3958 format!("smov {}, {}", rd, rn)
3959 }
3960 &Inst::VecDup { rd, rn, size } => {
3961 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
3962 let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
3963 format!("dup {}, {}", rd, rn)
3964 }
3965 &Inst::VecDupFromFpu { rd, rn, size } => {
3966 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
3967 let rn = show_vreg_element(rn, mb_rru, 0, size);
3968 format!("dup {}, {}", rd, rn)
3969 }
3970 &Inst::VecDupFPImm { rd, imm, size } => {
3971 let imm = imm.show_rru(mb_rru);
3972 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
3973
3974 format!("fmov {}, {}", rd, imm)
3975 }
3976 &Inst::VecDupImm {
3977 rd,
3978 imm,
3979 invert,
3980 size,
3981 } => {
3982 let imm = imm.show_rru(mb_rru);
3983 let op = if invert { "mvni" } else { "movi" };
3984 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
3985
3986 format!("{} {}, {}", op, rd, imm)
3987 }
3988 &Inst::VecExtend {
3989 t,
3990 rd,
3991 rn,
3992 high_half,
3993 } => {
3994 let (op, dest, src) = match (t, high_half) {
3995 (VecExtendOp::Sxtl8, false) => {
3996 ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8)
3997 }
3998 (VecExtendOp::Sxtl8, true) => {
3999 ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
4000 }
4001 (VecExtendOp::Sxtl16, false) => {
4002 ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4)
4003 }
4004 (VecExtendOp::Sxtl16, true) => {
4005 ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
4006 }
4007 (VecExtendOp::Sxtl32, false) => {
4008 ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2)
4009 }
4010 (VecExtendOp::Sxtl32, true) => {
4011 ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
4012 }
4013 (VecExtendOp::Uxtl8, false) => {
4014 ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8)
4015 }
4016 (VecExtendOp::Uxtl8, true) => {
4017 ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
4018 }
4019 (VecExtendOp::Uxtl16, false) => {
4020 ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4)
4021 }
4022 (VecExtendOp::Uxtl16, true) => {
4023 ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
4024 }
4025 (VecExtendOp::Uxtl32, false) => {
4026 ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2)
4027 }
4028 (VecExtendOp::Uxtl32, true) => {
4029 ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
4030 }
4031 };
4032 let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
4033 let rn = show_vreg_vector(rn, mb_rru, src);
4034 format!("{} {}, {}", op, rd, rn)
4035 }
4036 &Inst::VecMovElement {
4037 rd,
4038 rn,
4039 dest_idx,
4040 src_idx,
4041 size,
4042 } => {
4043 let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size);
4044 let rn = show_vreg_element(rn, mb_rru, src_idx, size);
4045 format!("mov {}, {}", rd, rn)
4046 }
4047 &Inst::VecRRLong {
4048 op,
4049 rd,
4050 rn,
4051 high_half,
4052 } => {
4053 let (op, rd_size, size, suffix) = match (op, high_half) {
4054 (VecRRLongOp::Fcvtl16, false) => {
4055 ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")
4056 }
4057 (VecRRLongOp::Fcvtl16, true) => {
4058 ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")
4059 }
4060 (VecRRLongOp::Fcvtl32, false) => {
4061 ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")
4062 }
4063 (VecRRLongOp::Fcvtl32, true) => {
4064 ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")
4065 }
4066 (VecRRLongOp::Shll8, false) => {
4067 ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")
4068 }
4069 (VecRRLongOp::Shll8, true) => {
4070 ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")
4071 }
4072 (VecRRLongOp::Shll16, false) => {
4073 ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")
4074 }
4075 (VecRRLongOp::Shll16, true) => {
4076 ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")
4077 }
4078 (VecRRLongOp::Shll32, false) => {
4079 ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")
4080 }
4081 (VecRRLongOp::Shll32, true) => {
4082 ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
4083 }
4084 };
4085 let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
4086 let rn = show_vreg_vector(rn, mb_rru, size);
4087
4088 format!("{} {}, {}{}", op, rd, rn, suffix)
4089 }
4090 &Inst::VecRRNarrow {
4091 op,
4092 rd,
4093 rn,
4094 high_half,
4095 } => {
4096 let (op, rd_size, size) = match (op, high_half) {
4097 (VecRRNarrowOp::Xtn16, false) => {
4098 ("xtn", VectorSize::Size8x8, VectorSize::Size16x8)
4099 }
4100 (VecRRNarrowOp::Xtn16, true) => {
4101 ("xtn2", VectorSize::Size8x16, VectorSize::Size16x8)
4102 }
4103 (VecRRNarrowOp::Xtn32, false) => {
4104 ("xtn", VectorSize::Size16x4, VectorSize::Size32x4)
4105 }
4106 (VecRRNarrowOp::Xtn32, true) => {
4107 ("xtn2", VectorSize::Size16x8, VectorSize::Size32x4)
4108 }
4109 (VecRRNarrowOp::Xtn64, false) => {
4110 ("xtn", VectorSize::Size32x2, VectorSize::Size64x2)
4111 }
4112 (VecRRNarrowOp::Xtn64, true) => {
4113 ("xtn2", VectorSize::Size32x4, VectorSize::Size64x2)
4114 }
4115 (VecRRNarrowOp::Sqxtn16, false) => {
4116 ("sqxtn", VectorSize::Size8x8, VectorSize::Size16x8)
4117 }
4118 (VecRRNarrowOp::Sqxtn16, true) => {
4119 ("sqxtn2", VectorSize::Size8x16, VectorSize::Size16x8)
4120 }
4121 (VecRRNarrowOp::Sqxtn32, false) => {
4122 ("sqxtn", VectorSize::Size16x4, VectorSize::Size32x4)
4123 }
4124 (VecRRNarrowOp::Sqxtn32, true) => {
4125 ("sqxtn2", VectorSize::Size16x8, VectorSize::Size32x4)
4126 }
4127 (VecRRNarrowOp::Sqxtn64, false) => {
4128 ("sqxtn", VectorSize::Size32x2, VectorSize::Size64x2)
4129 }
4130 (VecRRNarrowOp::Sqxtn64, true) => {
4131 ("sqxtn2", VectorSize::Size32x4, VectorSize::Size64x2)
4132 }
4133 (VecRRNarrowOp::Sqxtun16, false) => {
4134 ("sqxtun", VectorSize::Size8x8, VectorSize::Size16x8)
4135 }
4136 (VecRRNarrowOp::Sqxtun16, true) => {
4137 ("sqxtun2", VectorSize::Size8x16, VectorSize::Size16x8)
4138 }
4139 (VecRRNarrowOp::Sqxtun32, false) => {
4140 ("sqxtun", VectorSize::Size16x4, VectorSize::Size32x4)
4141 }
4142 (VecRRNarrowOp::Sqxtun32, true) => {
4143 ("sqxtun2", VectorSize::Size16x8, VectorSize::Size32x4)
4144 }
4145 (VecRRNarrowOp::Sqxtun64, false) => {
4146 ("sqxtun", VectorSize::Size32x2, VectorSize::Size64x2)
4147 }
4148 (VecRRNarrowOp::Sqxtun64, true) => {
4149 ("sqxtun2", VectorSize::Size32x4, VectorSize::Size64x2)
4150 }
4151 (VecRRNarrowOp::Uqxtn16, false) => {
4152 ("uqxtn", VectorSize::Size8x8, VectorSize::Size16x8)
4153 }
4154 (VecRRNarrowOp::Uqxtn16, true) => {
4155 ("uqxtn2", VectorSize::Size8x16, VectorSize::Size16x8)
4156 }
4157 (VecRRNarrowOp::Uqxtn32, false) => {
4158 ("uqxtn", VectorSize::Size16x4, VectorSize::Size32x4)
4159 }
4160 (VecRRNarrowOp::Uqxtn32, true) => {
4161 ("uqxtn2", VectorSize::Size16x8, VectorSize::Size32x4)
4162 }
4163 (VecRRNarrowOp::Uqxtn64, false) => {
4164 ("uqxtn", VectorSize::Size32x2, VectorSize::Size64x2)
4165 }
4166 (VecRRNarrowOp::Uqxtn64, true) => {
4167 ("uqxtn2", VectorSize::Size32x4, VectorSize::Size64x2)
4168 }
4169 (VecRRNarrowOp::Fcvtn32, false) => {
4170 ("fcvtn", VectorSize::Size16x4, VectorSize::Size32x4)
4171 }
4172 (VecRRNarrowOp::Fcvtn32, true) => {
4173 ("fcvtn2", VectorSize::Size16x8, VectorSize::Size32x4)
4174 }
4175 (VecRRNarrowOp::Fcvtn64, false) => {
4176 ("fcvtn", VectorSize::Size32x2, VectorSize::Size64x2)
4177 }
4178 (VecRRNarrowOp::Fcvtn64, true) => {
4179 ("fcvtn2", VectorSize::Size32x4, VectorSize::Size64x2)
4180 }
4181 };
4182 let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
4183 let rn = show_vreg_vector(rn, mb_rru, size);
4184
4185 format!("{} {}, {}", op, rd, rn)
4186 }
4187 &Inst::VecRRPair { op, rd, rn } => {
4188 let op = match op {
4189 VecPairOp::Addp => "addp",
4190 };
4191 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
4192 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size64x2);
4193
4194 format!("{} {}, {}", op, rd, rn)
4195 }
4196 &Inst::VecRRPairLong { op, rd, rn } => {
4197 let (op, dest, src) = match op {
4198 VecRRPairLongOp::Saddlp8 => {
4199 ("saddlp", VectorSize::Size16x8, VectorSize::Size8x16)
4200 }
4201 VecRRPairLongOp::Saddlp16 => {
4202 ("saddlp", VectorSize::Size32x4, VectorSize::Size16x8)
4203 }
4204 VecRRPairLongOp::Uaddlp8 => {
4205 ("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16)
4206 }
4207 VecRRPairLongOp::Uaddlp16 => {
4208 ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)
4209 }
4210 };
4211 let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
4212 let rn = show_vreg_vector(rn, mb_rru, src);
4213
4214 format!("{} {}, {}", op, rd, rn)
4215 }
4216 &Inst::VecRRR {
4217 rd,
4218 rn,
4219 rm,
4220 alu_op,
4221 size,
4222 } => {
4223 let (op, size) = match alu_op {
4224 VecALUOp::Sqadd => ("sqadd", size),
4225 VecALUOp::Uqadd => ("uqadd", size),
4226 VecALUOp::Sqsub => ("sqsub", size),
4227 VecALUOp::Uqsub => ("uqsub", size),
4228 VecALUOp::Cmeq => ("cmeq", size),
4229 VecALUOp::Cmge => ("cmge", size),
4230 VecALUOp::Cmgt => ("cmgt", size),
4231 VecALUOp::Cmhs => ("cmhs", size),
4232 VecALUOp::Cmhi => ("cmhi", size),
4233 VecALUOp::Fcmeq => ("fcmeq", size),
4234 VecALUOp::Fcmgt => ("fcmgt", size),
4235 VecALUOp::Fcmge => ("fcmge", size),
4236 VecALUOp::And => ("and", VectorSize::Size8x16),
4237 VecALUOp::Bic => ("bic", VectorSize::Size8x16),
4238 VecALUOp::Orr => ("orr", VectorSize::Size8x16),
4239 VecALUOp::Eor => ("eor", VectorSize::Size8x16),
4240 VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
4241 VecALUOp::Umaxp => ("umaxp", size),
4242 VecALUOp::Add => ("add", size),
4243 VecALUOp::Sub => ("sub", size),
4244 VecALUOp::Mul => ("mul", size),
4245 VecALUOp::Sshl => ("sshl", size),
4246 VecALUOp::Ushl => ("ushl", size),
4247 VecALUOp::Umin => ("umin", size),
4248 VecALUOp::Smin => ("smin", size),
4249 VecALUOp::Umax => ("umax", size),
4250 VecALUOp::Smax => ("smax", size),
4251 VecALUOp::Urhadd => ("urhadd", size),
4252 VecALUOp::Fadd => ("fadd", size),
4253 VecALUOp::Fsub => ("fsub", size),
4254 VecALUOp::Fdiv => ("fdiv", size),
4255 VecALUOp::Fmax => ("fmax", size),
4256 VecALUOp::Fmin => ("fmin", size),
4257 VecALUOp::Fmul => ("fmul", size),
4258 VecALUOp::Addp => ("addp", size),
4259 VecALUOp::Zip1 => ("zip1", size),
4260 VecALUOp::Sqrdmulh => ("sqrdmulh", size),
4261 };
4262 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
4263 let rn = show_vreg_vector(rn, mb_rru, size);
4264 let rm = show_vreg_vector(rm, mb_rru, size);
4265 format!("{} {}, {}, {}", op, rd, rn, rm)
4266 }
4267 &Inst::VecRRRLong {
4268 rd,
4269 rn,
4270 rm,
4271 alu_op,
4272 high_half,
4273 } => {
4274 let (op, dest_size, src_size) = match (alu_op, high_half) {
4275 (VecRRRLongOp::Smull8, false) => {
4276 ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
4277 }
4278 (VecRRRLongOp::Smull8, true) => {
4279 ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
4280 }
4281 (VecRRRLongOp::Smull16, false) => {
4282 ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
4283 }
4284 (VecRRRLongOp::Smull16, true) => {
4285 ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
4286 }
4287 (VecRRRLongOp::Smull32, false) => {
4288 ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
4289 }
4290 (VecRRRLongOp::Smull32, true) => {
4291 ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
4292 }
4293 (VecRRRLongOp::Umull8, false) => {
4294 ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
4295 }
4296 (VecRRRLongOp::Umull8, true) => {
4297 ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
4298 }
4299 (VecRRRLongOp::Umull16, false) => {
4300 ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
4301 }
4302 (VecRRRLongOp::Umull16, true) => {
4303 ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
4304 }
4305 (VecRRRLongOp::Umull32, false) => {
4306 ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
4307 }
4308 (VecRRRLongOp::Umull32, true) => {
4309 ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
4310 }
4311 (VecRRRLongOp::Umlal8, false) => {
4312 ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
4313 }
4314 (VecRRRLongOp::Umlal8, true) => {
4315 ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
4316 }
4317 (VecRRRLongOp::Umlal16, false) => {
4318 ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
4319 }
4320 (VecRRRLongOp::Umlal16, true) => {
4321 ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
4322 }
4323 (VecRRRLongOp::Umlal32, false) => {
4324 ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
4325 }
4326 (VecRRRLongOp::Umlal32, true) => {
4327 ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
4328 }
4329 };
4330 let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
4331 let rn = show_vreg_vector(rn, mb_rru, src_size);
4332 let rm = show_vreg_vector(rm, mb_rru, src_size);
4333 format!("{} {}, {}, {}", op, rd, rn, rm)
4334 }
4335 &Inst::VecMisc { op, rd, rn, size } => {
4336 let (op, size, suffix) = match op {
4337 VecMisc2::Not => (
4338 "mvn",
4339 if size.is_128bits() {
4340 VectorSize::Size8x16
4341 } else {
4342 VectorSize::Size8x8
4343 },
4344 "",
4345 ),
4346 VecMisc2::Neg => ("neg", size, ""),
4347 VecMisc2::Abs => ("abs", size, ""),
4348 VecMisc2::Fabs => ("fabs", size, ""),
4349 VecMisc2::Fneg => ("fneg", size, ""),
4350 VecMisc2::Fsqrt => ("fsqrt", size, ""),
4351 VecMisc2::Rev64 => ("rev64", size, ""),
4352 VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
4353 VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
4354 VecMisc2::Scvtf => ("scvtf", size, ""),
4355 VecMisc2::Ucvtf => ("ucvtf", size, ""),
4356 VecMisc2::Frintn => ("frintn", size, ""),
4357 VecMisc2::Frintz => ("frintz", size, ""),
4358 VecMisc2::Frintm => ("frintm", size, ""),
4359 VecMisc2::Frintp => ("frintp", size, ""),
4360 VecMisc2::Cnt => ("cnt", size, ""),
4361 VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
4362 };
4363 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
4364 let rn = show_vreg_vector(rn, mb_rru, size);
4365 format!("{} {}, {}{}", op, rd, rn, suffix)
4366 }
4367 &Inst::VecLanes { op, rd, rn, size } => {
4368 let op = match op {
4369 VecLanesOp::Uminv => "uminv",
4370 VecLanesOp::Addv => "addv",
4371 };
4372 let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
4373 let rn = show_vreg_vector(rn, mb_rru, size);
4374 format!("{} {}, {}", op, rd, rn)
4375 }
4376 &Inst::VecShiftImm {
4377 op,
4378 rd,
4379 rn,
4380 size,
4381 imm,
4382 } => {
4383 let op = match op {
4384 VecShiftImmOp::Shl => "shl",
4385 VecShiftImmOp::Ushr => "ushr",
4386 VecShiftImmOp::Sshr => "sshr",
4387 };
4388 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
4389 let rn = show_vreg_vector(rn, mb_rru, size);
4390 format!("{} {}, {}, #{}", op, rd, rn, imm)
4391 }
4392 &Inst::VecExtract { rd, rn, rm, imm4 } => {
4393 let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
4394 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
4395 let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
4396 format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
4397 }
4398 &Inst::VecTbl {
4399 rd,
4400 rn,
4401 rm,
4402 is_extension,
4403 } => {
4404 let op = if is_extension { "tbx" } else { "tbl" };
4405 let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
4406 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
4407 let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
4408 format!("{} {}, {{ {} }}, {}", op, rd, rn, rm)
4409 }
4410 &Inst::VecTbl2 {
4411 rd,
4412 rn,
4413 rn2,
4414 rm,
4415 is_extension,
4416 } => {
4417 let op = if is_extension { "tbx" } else { "tbl" };
4418 let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
4419 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
4420 let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16);
4421 let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
4422 format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
4423 }
4424 &Inst::VecLoadReplicate { rd, rn, size, .. } => {
4425 let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
4426 let rn = rn.show_rru(mb_rru);
4427
4428 format!("ld1r {{ {} }}, [{}]", rd, rn)
4429 }
4430 &Inst::VecCSel { rd, rn, rm, cond } => {
4431 let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
4432 let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
4433 let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
4434 let cond = cond.show_rru(mb_rru);
4435 format!(
4436 "vcsel {}, {}, {}, {} (if-then-else diamond)",
4437 rd, rn, rm, cond
4438 )
4439 }
4440 &Inst::MovToNZCV { rn } => {
4441 let rn = rn.show_rru(mb_rru);
4442 format!("msr nzcv, {}", rn)
4443 }
4444 &Inst::MovFromNZCV { rd } => {
4445 let rd = rd.to_reg().show_rru(mb_rru);
4446 format!("mrs {}, nzcv", rd)
4447 }
4448 &Inst::Extend {
4449 rd,
4450 rn,
4451 signed: false,
4452 from_bits: 1,
4453 ..
4454 } => {
4455 let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
4456 let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
4457 format!("and {}, {}, #1", rd, rn)
4458 }
4459 &Inst::Extend {
4460 rd,
4461 rn,
4462 signed: false,
4463 from_bits: 32,
4464 to_bits: 64,
4465 } => {
4466 // The case of a zero extension from 32 to 64 bits, is implemented
4467 // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
4468 // the top 32 bits.
4469 let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
4470 let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
4471 format!("mov {}, {}", rd, rn)
4472 }
4473 &Inst::Extend {
4474 rd,
4475 rn,
4476 signed,
4477 from_bits,
4478 to_bits,
4479 } => {
4480 assert!(from_bits <= to_bits);
4481 let op = match (signed, from_bits) {
4482 (false, 8) => "uxtb",
4483 (true, 8) => "sxtb",
4484 (false, 16) => "uxth",
4485 (true, 16) => "sxth",
4486 (true, 32) => "sxtw",
4487 (true, _) => "sbfx",
4488 (false, _) => "ubfx",
4489 };
4490 if op == "sbfx" || op == "ubfx" {
4491 let dest_size = OperandSize::from_bits(to_bits);
4492 let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
4493 let rn = show_ireg_sized(rn, mb_rru, dest_size);
4494 format!("{} {}, {}, #0, #{}", op, rd, rn, from_bits)
4495 } else {
4496 let dest_size = if signed {
4497 OperandSize::from_bits(to_bits)
4498 } else {
4499 OperandSize::Size32
4500 };
4501 let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
4502 let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
4503 format!("{} {}, {}", op, rd, rn)
4504 }
4505 }
4506 &Inst::Call { .. } => format!("bl 0"),
4507 &Inst::CallInd { ref info, .. } => {
4508 let rn = info.rn.show_rru(mb_rru);
4509 format!("blr {}", rn)
4510 }
4511 &Inst::Ret => "ret".to_string(),
4512 &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
4513 &Inst::Jump { ref dest } => {
4514 let dest = dest.show_rru(mb_rru);
4515 format!("b {}", dest)
4516 }
4517 &Inst::CondBr {
4518 ref taken,
4519 ref not_taken,
4520 ref kind,
4521 } => {
4522 let taken = taken.show_rru(mb_rru);
4523 let not_taken = not_taken.show_rru(mb_rru);
4524 match kind {
4525 &CondBrKind::Zero(reg) => {
4526 let reg = reg.show_rru(mb_rru);
4527 format!("cbz {}, {} ; b {}", reg, taken, not_taken)
4528 }
4529 &CondBrKind::NotZero(reg) => {
4530 let reg = reg.show_rru(mb_rru);
4531 format!("cbnz {}, {} ; b {}", reg, taken, not_taken)
4532 }
4533 &CondBrKind::Cond(c) => {
4534 let c = c.show_rru(mb_rru);
4535 format!("b.{} {} ; b {}", c, taken, not_taken)
4536 }
4537 }
4538 }
4539 &Inst::IndirectBr { rn, .. } => {
4540 let rn = rn.show_rru(mb_rru);
4541 format!("br {}", rn)
4542 }
4543 &Inst::Brk => "brk #0".to_string(),
4544 &Inst::Udf { .. } => "udf".to_string(),
4545 &Inst::TrapIf { ref kind, .. } => match kind {
4546 &CondBrKind::Zero(reg) => {
4547 let reg = reg.show_rru(mb_rru);
4548 format!("cbnz {}, 8 ; udf", reg)
4549 }
4550 &CondBrKind::NotZero(reg) => {
4551 let reg = reg.show_rru(mb_rru);
4552 format!("cbz {}, 8 ; udf", reg)
4553 }
4554 &CondBrKind::Cond(c) => {
4555 let c = c.invert().show_rru(mb_rru);
4556 format!("b.{} 8 ; udf", c)
4557 }
4558 },
4559 &Inst::Adr { rd, off } => {
4560 let rd = rd.show_rru(mb_rru);
4561 format!("adr {}, pc+{}", rd, off)
4562 }
4563 &Inst::Word4 { data } => format!("data.i32 {}", data),
4564 &Inst::Word8 { data } => format!("data.i64 {}", data),
4565 &Inst::JTSequence {
4566 ref info,
4567 ridx,
4568 rtmp1,
4569 rtmp2,
4570 ..
4571 } => {
4572 let ridx = ridx.show_rru(mb_rru);
4573 let rtmp1 = rtmp1.show_rru(mb_rru);
4574 let rtmp2 = rtmp2.show_rru(mb_rru);
4575 let default_target = info.default_target.show_rru(mb_rru);
4576 format!(
4577 concat!(
4578 "b.hs {} ; ",
4579 "adr {}, pc+16 ; ",
4580 "ldrsw {}, [{}, {}, LSL 2] ; ",
4581 "add {}, {}, {} ; ",
4582 "br {} ; ",
4583 "jt_entries {:?}"
4584 ),
4585 default_target,
4586 rtmp1,
4587 rtmp2,
4588 rtmp1,
4589 ridx,
4590 rtmp1,
4591 rtmp1,
4592 rtmp2,
4593 rtmp1,
4594 info.targets
4595 )
4596 }
4597 &Inst::LoadExtName {
4598 rd,
4599 ref name,
4600 offset,
4601 } => {
4602 let rd = rd.show_rru(mb_rru);
4603 format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset)
4604 }
4605 &Inst::LoadAddr { rd, ref mem } => {
4606 // TODO: we really should find a better way to avoid duplication of
4607 // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
4608 // expansion stage (i.e., legalization, but without the slow edit-in-place
4609 // of the existing legalization framework).
4610 let (mem_insts, mem) = mem_finalize(0, mem, state);
4611 let mut ret = String::new();
4612 for inst in mem_insts.into_iter() {
4613 ret.push_str(&inst.show_rru(mb_rru));
4614 }
4615 let (reg, index_reg, offset) = match mem {
4616 AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
4617 AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
4618 AMode::UnsignedOffset(r, uimm12scaled) => {
4619 (r, None, uimm12scaled.value() as i32)
4620 }
4621 _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
4622 };
4623 let abs_offset = if offset < 0 {
4624 -offset as u64
4625 } else {
4626 offset as u64
4627 };
4628 let alu_op = if offset < 0 {
4629 ALUOp::Sub64
4630 } else {
4631 ALUOp::Add64
4632 };
4633
4634 if let Some((idx, extendop)) = index_reg {
4635 let add = Inst::AluRRRExtend {
4636 alu_op: ALUOp::Add64,
4637 rd,
4638 rn: reg,
4639 rm: idx,
4640 extendop,
4641 };
4642
4643 ret.push_str(&add.show_rru(mb_rru));
4644 } else if offset == 0 {
4645 let mov = Inst::gen_move(rd, reg, I64);
4646 ret.push_str(&mov.show_rru(mb_rru));
4647 } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
4648 let add = Inst::AluRRImm12 {
4649 alu_op,
4650 rd,
4651 rn: reg,
4652 imm12,
4653 };
4654 ret.push_str(&add.show_rru(mb_rru));
4655 } else {
4656 let tmp = writable_spilltmp_reg();
4657 for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
4658 ret.push_str(&inst.show_rru(mb_rru));
4659 }
4660 let add = Inst::AluRRR {
4661 alu_op,
4662 rd,
4663 rn: reg,
4664 rm: tmp.to_reg(),
4665 };
4666 ret.push_str(&add.show_rru(mb_rru));
4667 }
4668 ret
4669 }
4670 &Inst::VirtualSPOffsetAdj { offset } => {
4671 state.virtual_sp_offset += offset;
4672 format!("virtual_sp_offset_adjust {}", offset)
4673 }
4674 &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space),
4675
4676 &Inst::ElfTlsGetAddr { ref symbol } => {
4677 format!("elf_tls_get_addr {}", symbol)
4678 }
4679
4680 &Inst::ValueLabelMarker { label, reg } => {
4681 format!("value_label {:?}, {}", label, reg.show_rru(mb_rru))
4682 }
4683
4684 &Inst::Unwind { ref inst } => {
4685 format!("unwind {:?}", inst)
4686 }
4687 }
4688 }
4689 }
4690
4691 //=============================================================================
4692 // Label fixups and jump veneers.
4693
4694 /// Different forms of label references for different instruction formats.
4695 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
4696 pub enum LabelUse {
4697 /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
4698 /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
4699 Branch19,
4700 /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
4701 /// signed bits, in bits 25:0. Used by b, bl.
4702 Branch26,
4703 /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
4704 /// in bits 23:5.
4705 Ldr19,
4706 /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
4707 /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
4708 Adr21,
4709 /// 32-bit PC relative constant offset (from address of constant itself),
4710 /// signed. Used in jump tables.
4711 PCRel32,
4712 }
4713
4714 impl MachInstLabelUse for LabelUse {
4715 /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
4716 const ALIGN: CodeOffset = 4;
4717
4718 /// Maximum PC-relative range (positive), inclusive.
max_pos_range(self) -> CodeOffset4719 fn max_pos_range(self) -> CodeOffset {
4720 match self {
4721 // 19-bit immediate, left-shifted by 2, for 21 bits of total range. Signed, so +2^20
4722 // from zero. Likewise for two other shifted cases below.
4723 LabelUse::Branch19 => (1 << 20) - 1,
4724 LabelUse::Branch26 => (1 << 27) - 1,
4725 LabelUse::Ldr19 => (1 << 20) - 1,
4726 // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
4727 // range.
4728 LabelUse::Adr21 => (1 << 20) - 1,
4729 LabelUse::PCRel32 => 0x7fffffff,
4730 }
4731 }
4732
4733 /// Maximum PC-relative range (negative).
max_neg_range(self) -> CodeOffset4734 fn max_neg_range(self) -> CodeOffset {
4735 // All forms are twos-complement signed offsets, so negative limit is one more than
4736 // positive limit.
4737 self.max_pos_range() + 1
4738 }
4739
4740 /// Size of window into code needed to do the patch.
patch_size(self) -> CodeOffset4741 fn patch_size(self) -> CodeOffset {
4742 // Patch is on one instruction only for all of these label reference types.
4743 4
4744 }
4745
4746 /// Perform the patch.
patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset)4747 fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
4748 let pc_rel = (label_offset as i64) - (use_offset as i64);
4749 debug_assert!(pc_rel <= self.max_pos_range() as i64);
4750 debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
4751 let pc_rel = pc_rel as u32;
4752 let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
4753 let mask = match self {
4754 LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
4755 LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
4756 LabelUse::Ldr19 => 0x00ffffe0, // bits 23..5 inclusive
4757 LabelUse::Adr21 => 0x60ffffe0, // bits 30..29, 25..5 inclusive
4758 LabelUse::PCRel32 => 0xffffffff,
4759 };
4760 let pc_rel_shifted = match self {
4761 LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
4762 _ => {
4763 debug_assert!(pc_rel & 3 == 0);
4764 pc_rel >> 2
4765 }
4766 };
4767 let pc_rel_inserted = match self {
4768 LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
4769 LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
4770 LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10,
4771 LabelUse::PCRel32 => pc_rel_shifted,
4772 };
4773 let is_add = match self {
4774 LabelUse::PCRel32 => true,
4775 _ => false,
4776 };
4777 let insn_word = if is_add {
4778 insn_word.wrapping_add(pc_rel_inserted)
4779 } else {
4780 (insn_word & !mask) | pc_rel_inserted
4781 };
4782 buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
4783 }
4784
4785 /// Is a veneer supported for this label reference type?
supports_veneer(self) -> bool4786 fn supports_veneer(self) -> bool {
4787 match self {
4788 LabelUse::Branch19 => true, // veneer is a Branch26
4789 _ => false,
4790 }
4791 }
4792
4793 /// How large is the veneer, if supported?
veneer_size(self) -> CodeOffset4794 fn veneer_size(self) -> CodeOffset {
4795 4
4796 }
4797
4798 /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
4799 /// an offset and label-use for the veneer's use of the original label.
generate_veneer( self, buffer: &mut [u8], veneer_offset: CodeOffset, ) -> (CodeOffset, LabelUse)4800 fn generate_veneer(
4801 self,
4802 buffer: &mut [u8],
4803 veneer_offset: CodeOffset,
4804 ) -> (CodeOffset, LabelUse) {
4805 match self {
4806 LabelUse::Branch19 => {
4807 // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
4808 // bother with constructing an Inst.
4809 let insn_word = 0b000101 << 26;
4810 buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
4811 (veneer_offset, LabelUse::Branch26)
4812 }
4813 _ => panic!("Unsupported label-reference type for veneer generation!"),
4814 }
4815 }
4816 }
4817