1; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7;
8; Unpredicated dup instruction (which is an alias for mov):
9;   * register + register,
10;   * register + immediate
11;
12
13define <vscale x 16 x i8> @dup_i8(i8 %b) {
14; CHECK-LABEL: dup_i8:
15; CHECK: mov z0.b, w0
16; CHECK-NEXT: ret
17  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %b)
18  ret <vscale x 16 x i8> %out
19}
20
21define <vscale x 16 x i8> @dup_imm_i8() {
22; CHECK-LABEL: dup_imm_i8:
23; CHECK: mov z0.b, #16
24; CHECK-NEXT: ret
25  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 16)
26  ret <vscale x 16 x i8> %out
27}
28
29define <vscale x 8 x i16> @dup_i16(i16 %b) {
30; CHECK-LABEL: dup_i16:
31; CHECK: mov z0.h, w0
32; CHECK-NEXT: ret
33  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %b)
34  ret <vscale x 8 x i16> %out
35}
36
37define <vscale x 8 x i16> @dup_imm_i16(i16 %b) {
38; CHECK-LABEL: dup_imm_i16:
39; CHECK: mov z0.h, #16
40; CHECK-NEXT: ret
41  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16)
42  ret <vscale x 8 x i16> %out
43}
44
45define <vscale x 4 x i32> @dup_i32(i32 %b) {
46; CHECK-LABEL: dup_i32:
47; CHECK: mov z0.s, w0
48; CHECK-NEXT: ret
49  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %b)
50  ret <vscale x 4 x i32> %out
51}
52
53define <vscale x 4 x i32> @dup_imm_i32(i32 %b) {
54; CHECK-LABEL: dup_imm_i32:
55; CHECK: mov z0.s, #16
56; CHECK-NEXT: ret
57  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 16)
58  ret <vscale x 4 x i32> %out
59}
60
61define <vscale x 2 x i64> @dup_i64(i64 %b) {
62; CHECK-LABEL: dup_i64:
63; CHECK: mov z0.d, x0
64; CHECK-NEXT: ret
65  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %b)
66  ret <vscale x 2 x i64> %out
67}
68
69define <vscale x 2 x i64> @dup_imm_i64(i64 %b) {
70; CHECK-LABEL: dup_imm_i64:
71; CHECK: mov z0.d, #16
72; CHECK-NEXT: ret
73  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 16)
74  ret <vscale x 2 x i64> %out
75}
76
77define <vscale x 8 x half> @dup_f16(half %b) {
78; CHECK-LABEL: dup_f16:
79; CHECK: mov z0.h, h0
80; CHECK-NEXT: ret
81  %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b)
82  ret <vscale x 8 x half> %out
83}
84
85define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 {
86; CHECK-LABEL: dup_bf16:
87; CHECK: mov z0.h, h0
88; CHECK-NEXT: ret
89  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b)
90  ret <vscale x 8 x bfloat> %out
91}
92
93define <vscale x 8 x half> @dup_imm_f16(half %b) {
94; CHECK-LABEL: dup_imm_f16:
95; CHECK: mov z0.h, #16.00000000
96; CHECK-NEXT: ret
97  %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 16.)
98  ret <vscale x 8 x half> %out
99}
100
101define <vscale x 4 x float> @dup_f32(float %b) {
102; CHECK-LABEL: dup_f32:
103; CHECK: mov z0.s, s0
104; CHECK-NEXT: ret
105  %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %b)
106  ret <vscale x 4 x float> %out
107}
108
109define <vscale x 4 x float> @dup_imm_f32(float %b) {
110; CHECK-LABEL: dup_imm_f32:
111; CHECK: mov z0.s, #16.00000000
112; CHECK-NEXT: ret
113  %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 16.)
114  ret <vscale x 4 x float> %out
115}
116
117define <vscale x 2 x double> @dup_f64(double %b) {
118; CHECK-LABEL: dup_f64:
119; CHECK: mov z0.d, d0
120; CHECK-NEXT: ret
121  %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %b)
122  ret <vscale x 2 x double> %out
123}
124
125define <vscale x 2 x double> @dup_imm_f64(double %b) {
126; CHECK-LABEL: dup_imm_f64:
127; CHECK: mov z0.d, #16.00000000
128; CHECK-NEXT: ret
129  %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 16.)
130  ret <vscale x 2 x double> %out
131}
132
133declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8( i8)
134declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
135declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
136declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)
137declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
138declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat)
139declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
140declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)
141
142; +bf16 is required for the bfloat version.
143attributes #0 = { "target-features"="+sve,+bf16" }
144