1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7;
8; ADD
9;
10
11define <vscale x 16 x i8> @add_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
12; CHECK-LABEL: add_i8_zero:
13; CHECK:      movprfx z0.b, p0/z, z0.b
14; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
15; CHECK-NEXT: ret
16  %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
17  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %pg,
18                                                               <vscale x 16 x i8> %a_z,
19                                                               <vscale x 16 x i8> %b)
20  ret <vscale x 16 x i8> %out
21}
22
23define <vscale x 8 x i16> @add_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
24; CHECK-LABEL: add_i16_zero:
25; CHECK:      movprfx z0.h, p0/z, z0.h
26; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
27; CHECK-NEXT: ret
28  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
29  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %pg,
30                                                               <vscale x 8 x i16> %a_z,
31                                                               <vscale x 8 x i16> %b)
32  ret <vscale x 8 x i16> %out
33}
34
35define <vscale x 4 x i32> @add_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
36; CHECK-LABEL: add_i32_zero:
37; CHECK:      movprfx z0.s, p0/z, z0.s
38; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
39; CHECK-NEXT: ret
40  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
41  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg,
42                                                               <vscale x 4 x i32> %a_z,
43                                                               <vscale x 4 x i32> %b)
44  ret <vscale x 4 x i32> %out
45}
46
47define <vscale x 2 x i64> @add_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
48; CHECK-LABEL: add_i64_zero:
49; CHECK:      movprfx z0.d, p0/z, z0.d
50; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
51; CHECK-NEXT: ret
52  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
53  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %pg,
54                                                               <vscale x 2 x i64> %a_z,
55                                                               <vscale x 2 x i64> %b)
56  ret <vscale x 2 x i64> %out
57}
58
59;
60; SUB
61;
62
63define <vscale x 16 x i8> @sub_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
64; CHECK-LABEL: sub_i8_zero:
65; CHECK:      movprfx z0.b, p0/z, z0.b
66; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b
67; CHECK-NEXT: ret
68  %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
69  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %pg,
70                                                               <vscale x 16 x i8> %a_z,
71                                                               <vscale x 16 x i8> %b)
72  ret <vscale x 16 x i8> %out
73}
74
75define <vscale x 8 x i16> @sub_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
76; CHECK-LABEL: sub_i16_zero:
77; CHECK:      movprfx z0.h, p0/z, z0.h
78; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h
79; CHECK-NEXT: ret
80  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
81  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %pg,
82                                                               <vscale x 8 x i16> %a_z,
83                                                               <vscale x 8 x i16> %b)
84  ret <vscale x 8 x i16> %out
85}
86
87define <vscale x 4 x i32> @sub_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
88; CHECK-LABEL: sub_i32_zero:
89; CHECK:      movprfx z0.s, p0/z, z0.s
90; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
91; CHECK-NEXT: ret
92  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
93  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg,
94                                                               <vscale x 4 x i32> %a_z,
95                                                               <vscale x 4 x i32> %b)
96  ret <vscale x 4 x i32> %out
97}
98
99define <vscale x 2 x i64> @sub_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
100; CHECK-LABEL: sub_i64_zero:
101; CHECK:      movprfx z0.d, p0/z, z0.d
102; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d
103; CHECK-NEXT: ret
104  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
105  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %pg,
106                                                               <vscale x 2 x i64> %a_z,
107                                                               <vscale x 2 x i64> %b)
108  ret <vscale x 2 x i64> %out
109}
110
111;
112; SUBR
113;
114
115define <vscale x 16 x i8> @subr_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
116; CHECK-LABEL: subr_i8_zero:
117; CHECK:      movprfx z0.b, p0/z, z0.b
118; CHECK-NEXT: subr z0.b, p0/m, z0.b, z1.b
119; CHECK-NEXT: ret
120  %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
121  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> %pg,
122                                                                <vscale x 16 x i8> %a_z,
123                                                                <vscale x 16 x i8> %b)
124  ret <vscale x 16 x i8> %out
125}
126
127define <vscale x 8 x i16> @subr_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
128; CHECK-LABEL: subr_i16_zero:
129; CHECK:      movprfx z0.h, p0/z, z0.h
130; CHECK-NEXT: subr z0.h, p0/m, z0.h, z1.h
131; CHECK-NEXT: ret
132  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
133  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> %pg,
134                                                                <vscale x 8 x i16> %a_z,
135                                                                <vscale x 8 x i16> %b)
136  ret <vscale x 8 x i16> %out
137}
138
139define <vscale x 4 x i32> @subr_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
140; CHECK-LABEL: subr_i32_zero:
141; CHECK:      movprfx z0.s, p0/z, z0.s
142; CHECK-NEXT: subr z0.s, p0/m, z0.s, z1.s
143; CHECK-NEXT: ret
144  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
145  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg,
146                                                                <vscale x 4 x i32> %a_z,
147                                                                <vscale x 4 x i32> %b)
148  ret <vscale x 4 x i32> %out
149}
150
151define <vscale x 2 x i64> @subr_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
152; CHECK-LABEL: subr_i64_zero:
153; CHECK:      movprfx z0.d, p0/z, z0.d
154; CHECK-NEXT: subr z0.d, p0/m, z0.d, z1.d
155; CHECK-NEXT: ret
156  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
157  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> %pg,
158                                                                <vscale x 2 x i64> %a_z,
159                                                                <vscale x 2 x i64> %b)
160  ret <vscale x 2 x i64> %out
161}
162
163declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
164declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
165declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
166declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
167
168declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
169declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
170declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
171declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
172
173declare <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
174declare <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
175declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
176declare <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
177