1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7;
8; ST1B
9;
10
11define void @st1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %a, i64 %index) {
12; CHECK-LABEL: st1b_i8:
13; CHECK: st1b { z0.b }, p0, [x0, x1]
14; CHECK-NEXT: ret
15  %base = getelementptr i8, i8* %a, i64 %index
16  call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> %data,
17                                          <vscale x 16 x i1> %pred,
18                                          i8* %base)
19  ret void
20}
21
22
23
24define void @st1b_h(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i8* %a, i64 %index) {
25; CHECK-LABEL: st1b_h:
26; CHECK: st1b { z0.h }, p0, [x0, x1]
27; CHECK-NEXT: ret
28  %base = getelementptr i8, i8* %a, i64 %index
29  %trunc = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8>
30  call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> %trunc,
31                                         <vscale x 8 x i1> %pred,
32                                         i8* %base)
33  ret void
34}
35
36define void @st1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i8* %a, i64 %index) {
37; CHECK-LABEL: st1b_s:
38; CHECK: st1b { z0.s }, p0, [x0, x1]
39; CHECK-NEXT: ret
40  %base = getelementptr i8, i8* %a, i64 %index
41  %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
42  call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> %trunc,
43                                         <vscale x 4 x i1> %pred,
44                                         i8* %base)
45  ret void
46}
47
48define void @st1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i8* %a, i64 %index) {
49; CHECK-LABEL: st1b_d:
50; CHECK: st1b { z0.d }, p0, [x0, x1]
51; CHECK-NEXT: ret
52  %base = getelementptr i8, i8* %a, i64 %index
53  %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
54  call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> %trunc,
55                                         <vscale x 2 x i1> %pred,
56                                         i8* %base)
57  ret void
58}
59
60;
61; ST1H
62;
63
64define void @st1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16* %a, i64 %index) {
65; CHECK-LABEL: st1h_i16:
66; CHECK: st1h { z0.h }, p0, [x0, x1, lsl #1]
67; CHECK-NEXT: ret
68  %base = getelementptr i16, i16* %a, i64 %index
69  call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %data,
70                                          <vscale x 8 x i1> %pred,
71                                          i16* %base)
72  ret void
73}
74
75define void @st1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half* %a, i64 %index) {
76; CHECK-LABEL: st1h_f16:
77; CHECK: st1h { z0.h }, p0, [x0, x1, lsl #1]
78; CHECK-NEXT: ret
79  %base = getelementptr half, half* %a, i64 %index
80  call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> %data,
81                                          <vscale x 8 x i1> %pred,
82                                          half* %base)
83  ret void
84}
85
86define void @st1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, bfloat* %a, i64 %index) #0 {
87; CHECK-LABEL: st1h_bf16:
88; CHECK: st1h { z0.h }, p0, [x0, x1, lsl #1]
89; CHECK-NEXT: ret
90  %base = getelementptr bfloat, bfloat* %a, i64 %index
91  call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> %data,
92                                           <vscale x 8 x i1> %pred,
93                                           bfloat* %base)
94  ret void
95}
96
97define void @st1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i16* %addr) {
98; CHECK-LABEL: st1h_s:
99; CHECK: st1h { z0.s }, p0, [x0]
100; CHECK-NEXT: ret
101  %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
102  call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> %trunc,
103                                         <vscale x 4 x i1> %pred,
104                                         i16* %addr)
105  ret void
106}
107
108define void @st1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i16* %a, i64 %index) {
109; CHECK-LABEL: st1h_d:
110; CHECK: st1h { z0.d }, p0, [x0, x1, lsl #1]
111; CHECK-NEXT: ret
112  %base = getelementptr i16, i16* %a, i64 %index
113  %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
114  call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> %trunc,
115                                         <vscale x 2 x i1> %pred,
116                                         i16* %base)
117  ret void
118}
119
120;
121; ST1W
122;
123
124define void @st1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32* %a, i64 %index) {
125; CHECK-LABEL: st1w_i32:
126; CHECK: st1w { z0.s }, p0, [x0, x1, lsl #2]
127; CHECK-NEXT: ret
128  %base = getelementptr i32, i32* %a, i64 %index
129  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %data,
130                                          <vscale x 4 x i1> %pred,
131                                          i32* %base)
132  ret void
133}
134
135define void @st1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, float* %a, i64 %index) {
136; CHECK-LABEL: st1w_f32:
137; CHECK: st1w { z0.s }, p0, [x0, x1, lsl #2]
138; CHECK-NEXT: ret
139  %base = getelementptr float, float* %a, i64 %index
140  call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> %data,
141                                          <vscale x 4 x i1> %pred,
142                                          float* %base)
143  ret void
144}
145
146define void @st1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i32* %a, i64 %index) {
147; CHECK-LABEL: st1w_d:
148; CHECK: st1w { z0.d }, p0, [x0, x1, lsl #2]
149; CHECK-NEXT: ret
150  %base = getelementptr i32, i32* %a, i64 %index
151  %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
152  call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> %trunc,
153                                          <vscale x 2 x i1> %pred,
154                                          i32* %base)
155  ret void
156}
157
158;
159; ST1D
160;
161
162define void @st1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64* %a, i64 %index) {
163; CHECK-LABEL: st1d_i64:
164; CHECK: st1d { z0.d }, p0, [x0, x1, lsl #3]
165; CHECK-NEXT: ret
166  %base = getelementptr i64, i64* %a, i64 %index
167  call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> %data,
168                                          <vscale x 2 x i1> %pred,
169                                          i64* %base)
170  ret void
171}
172
173define void @st1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, double* %a, i64 %index) {
174; CHECK-LABEL: st1d_f64:
175; CHECK: st1d { z0.d }, p0, [x0, x1, lsl #3]
176; CHECK-NEXT: ret
177  %base = getelementptr double, double* %a, i64 %index
178  call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> %data,
179                                          <vscale x 2 x i1> %pred,
180                                          double* %base)
181  ret void
182}
183
184declare void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
185
186declare void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i8*)
187declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
188declare void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half*)
189declare void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
190
191declare void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i8*)
192declare void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*)
193declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
194declare void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*)
195
196declare void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*)
197declare void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*)
198declare void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*)
199declare void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
200declare void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*)
201
202; +bf16 is required for the bfloat version.
203attributes #0 = { "target-features"="+sve,+bf16" }
204