1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
4 
5 #include <arm_mve.h>
6 
7 // CHECK-LABEL: @test_vld1q_f16(
8 // CHECK-NEXT:  entry:
9 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <8 x half>*
10 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, <8 x half>* [[TMP0]], align 2
11 // CHECK-NEXT:    ret <8 x half> [[TMP1]]
12 //
test_vld1q_f16(const float16_t * base)13 float16x8_t test_vld1q_f16(const float16_t *base)
14 {
15 #ifdef POLYMORPHIC
16     return vld1q(base);
17 #else /* POLYMORPHIC */
18     return vld1q_f16(base);
19 #endif /* POLYMORPHIC */
20 }
21 
22 // CHECK-LABEL: @test_vld1q_f32(
23 // CHECK-NEXT:  entry:
24 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <4 x float>*
25 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
26 // CHECK-NEXT:    ret <4 x float> [[TMP1]]
27 //
test_vld1q_f32(const float32_t * base)28 float32x4_t test_vld1q_f32(const float32_t *base)
29 {
30 #ifdef POLYMORPHIC
31     return vld1q(base);
32 #else /* POLYMORPHIC */
33     return vld1q_f32(base);
34 #endif /* POLYMORPHIC */
35 }
36 
37 // CHECK-LABEL: @test_vld1q_s8(
38 // CHECK-NEXT:  entry:
39 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
40 // CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1
41 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
42 //
test_vld1q_s8(const int8_t * base)43 int8x16_t test_vld1q_s8(const int8_t *base)
44 {
45 #ifdef POLYMORPHIC
46     return vld1q(base);
47 #else /* POLYMORPHIC */
48     return vld1q_s8(base);
49 #endif /* POLYMORPHIC */
50 }
51 
52 // CHECK-LABEL: @test_vld1q_s16(
53 // CHECK-NEXT:  entry:
54 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
55 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
56 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
57 //
test_vld1q_s16(const int16_t * base)58 int16x8_t test_vld1q_s16(const int16_t *base)
59 {
60 #ifdef POLYMORPHIC
61     return vld1q(base);
62 #else /* POLYMORPHIC */
63     return vld1q_s16(base);
64 #endif /* POLYMORPHIC */
65 }
66 
67 // CHECK-LABEL: @test_vld1q_s32(
68 // CHECK-NEXT:  entry:
69 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
70 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
71 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
72 //
test_vld1q_s32(const int32_t * base)73 int32x4_t test_vld1q_s32(const int32_t *base)
74 {
75 #ifdef POLYMORPHIC
76     return vld1q(base);
77 #else /* POLYMORPHIC */
78     return vld1q_s32(base);
79 #endif /* POLYMORPHIC */
80 }
81 
82 // CHECK-LABEL: @test_vld1q_u8(
83 // CHECK-NEXT:  entry:
84 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
85 // CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1
86 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
87 //
test_vld1q_u8(const uint8_t * base)88 uint8x16_t test_vld1q_u8(const uint8_t *base)
89 {
90 #ifdef POLYMORPHIC
91     return vld1q(base);
92 #else /* POLYMORPHIC */
93     return vld1q_u8(base);
94 #endif /* POLYMORPHIC */
95 }
96 
97 // CHECK-LABEL: @test_vld1q_u16(
98 // CHECK-NEXT:  entry:
99 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
100 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
101 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
102 //
test_vld1q_u16(const uint16_t * base)103 uint16x8_t test_vld1q_u16(const uint16_t *base)
104 {
105 #ifdef POLYMORPHIC
106     return vld1q(base);
107 #else /* POLYMORPHIC */
108     return vld1q_u16(base);
109 #endif /* POLYMORPHIC */
110 }
111 
112 // CHECK-LABEL: @test_vld1q_u32(
113 // CHECK-NEXT:  entry:
114 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
115 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
116 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
117 //
test_vld1q_u32(const uint32_t * base)118 uint32x4_t test_vld1q_u32(const uint32_t *base)
119 {
120 #ifdef POLYMORPHIC
121     return vld1q(base);
122 #else /* POLYMORPHIC */
123     return vld1q_u32(base);
124 #endif /* POLYMORPHIC */
125 }
126 
127 // CHECK-LABEL: @test_vld1q_z_f16(
128 // CHECK-NEXT:  entry:
129 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <8 x half>*
130 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
131 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
132 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* [[TMP0]], i32 2, <8 x i1> [[TMP2]], <8 x half> zeroinitializer)
133 // CHECK-NEXT:    ret <8 x half> [[TMP3]]
134 //
test_vld1q_z_f16(const float16_t * base,mve_pred16_t p)135 float16x8_t test_vld1q_z_f16(const float16_t *base, mve_pred16_t p)
136 {
137 #ifdef POLYMORPHIC
138     return vld1q_z(base, p);
139 #else /* POLYMORPHIC */
140     return vld1q_z_f16(base, p);
141 #endif /* POLYMORPHIC */
142 }
143 
144 // CHECK-LABEL: @test_vld1q_z_f32(
145 // CHECK-NEXT:  entry:
146 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <4 x float>*
147 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
148 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
149 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP0]], i32 4, <4 x i1> [[TMP2]], <4 x float> zeroinitializer)
150 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
151 //
test_vld1q_z_f32(const float32_t * base,mve_pred16_t p)152 float32x4_t test_vld1q_z_f32(const float32_t *base, mve_pred16_t p)
153 {
154 #ifdef POLYMORPHIC
155     return vld1q_z(base, p);
156 #else /* POLYMORPHIC */
157     return vld1q_z_f32(base, p);
158 #endif /* POLYMORPHIC */
159 }
160 
161 // CHECK-LABEL: @test_vld1q_z_s8(
162 // CHECK-NEXT:  entry:
163 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
164 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
165 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
166 // CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP0]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
167 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
168 //
test_vld1q_z_s8(const int8_t * base,mve_pred16_t p)169 int8x16_t test_vld1q_z_s8(const int8_t *base, mve_pred16_t p)
170 {
171 #ifdef POLYMORPHIC
172     return vld1q_z(base, p);
173 #else /* POLYMORPHIC */
174     return vld1q_z_s8(base, p);
175 #endif /* POLYMORPHIC */
176 }
177 
178 // CHECK-LABEL: @test_vld1q_z_s16(
179 // CHECK-NEXT:  entry:
180 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
181 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
182 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
183 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP0]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
184 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
185 //
test_vld1q_z_s16(const int16_t * base,mve_pred16_t p)186 int16x8_t test_vld1q_z_s16(const int16_t *base, mve_pred16_t p)
187 {
188 #ifdef POLYMORPHIC
189     return vld1q_z(base, p);
190 #else /* POLYMORPHIC */
191     return vld1q_z_s16(base, p);
192 #endif /* POLYMORPHIC */
193 }
194 
195 // CHECK-LABEL: @test_vld1q_z_s32(
196 // CHECK-NEXT:  entry:
197 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
198 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
199 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
200 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP0]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
201 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
202 //
test_vld1q_z_s32(const int32_t * base,mve_pred16_t p)203 int32x4_t test_vld1q_z_s32(const int32_t *base, mve_pred16_t p)
204 {
205 #ifdef POLYMORPHIC
206     return vld1q_z(base, p);
207 #else /* POLYMORPHIC */
208     return vld1q_z_s32(base, p);
209 #endif /* POLYMORPHIC */
210 }
211 
212 // CHECK-LABEL: @test_vld1q_z_u8(
213 // CHECK-NEXT:  entry:
214 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
215 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
216 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
217 // CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP0]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
218 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
219 //
test_vld1q_z_u8(const uint8_t * base,mve_pred16_t p)220 uint8x16_t test_vld1q_z_u8(const uint8_t *base, mve_pred16_t p)
221 {
222 #ifdef POLYMORPHIC
223     return vld1q_z(base, p);
224 #else /* POLYMORPHIC */
225     return vld1q_z_u8(base, p);
226 #endif /* POLYMORPHIC */
227 }
228 
229 // CHECK-LABEL: @test_vld1q_z_u16(
230 // CHECK-NEXT:  entry:
231 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
232 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
233 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
234 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP0]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
235 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
236 //
test_vld1q_z_u16(const uint16_t * base,mve_pred16_t p)237 uint16x8_t test_vld1q_z_u16(const uint16_t *base, mve_pred16_t p)
238 {
239 #ifdef POLYMORPHIC
240     return vld1q_z(base, p);
241 #else /* POLYMORPHIC */
242     return vld1q_z_u16(base, p);
243 #endif /* POLYMORPHIC */
244 }
245 
246 // CHECK-LABEL: @test_vld1q_z_u32(
247 // CHECK-NEXT:  entry:
248 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
249 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
250 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
251 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP0]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
252 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
253 //
test_vld1q_z_u32(const uint32_t * base,mve_pred16_t p)254 uint32x4_t test_vld1q_z_u32(const uint32_t *base, mve_pred16_t p)
255 {
256 #ifdef POLYMORPHIC
257     return vld1q_z(base, p);
258 #else /* POLYMORPHIC */
259     return vld1q_z_u32(base, p);
260 #endif /* POLYMORPHIC */
261 }
262 
263 // CHECK-LABEL: @test_vldrbq_s8(
264 // CHECK-NEXT:  entry:
265 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
266 // CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1
267 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
268 //
test_vldrbq_s8(const int8_t * base)269 int8x16_t test_vldrbq_s8(const int8_t *base)
270 {
271     return vldrbq_s8(base);
272 }
273 
274 // CHECK-LABEL: @test_vldrbq_s16(
275 // CHECK-NEXT:  entry:
276 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <8 x i8>*
277 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
278 // CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
279 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
280 //
test_vldrbq_s16(const int8_t * base)281 int16x8_t test_vldrbq_s16(const int8_t *base)
282 {
283     return vldrbq_s16(base);
284 }
285 
286 // CHECK-LABEL: @test_vldrbq_s32(
287 // CHECK-NEXT:  entry:
288 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <4 x i8>*
289 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
290 // CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32>
291 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
292 //
test_vldrbq_s32(const int8_t * base)293 int32x4_t test_vldrbq_s32(const int8_t *base)
294 {
295     return vldrbq_s32(base);
296 }
297 
298 // CHECK-LABEL: @test_vldrbq_u8(
299 // CHECK-NEXT:  entry:
300 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
301 // CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1
302 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
303 //
test_vldrbq_u8(const uint8_t * base)304 uint8x16_t test_vldrbq_u8(const uint8_t *base)
305 {
306     return vldrbq_u8(base);
307 }
308 
309 // CHECK-LABEL: @test_vldrbq_u16(
310 // CHECK-NEXT:  entry:
311 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <8 x i8>*
312 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
313 // CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
314 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
315 //
test_vldrbq_u16(const uint8_t * base)316 uint16x8_t test_vldrbq_u16(const uint8_t *base)
317 {
318     return vldrbq_u16(base);
319 }
320 
321 // CHECK-LABEL: @test_vldrbq_u32(
322 // CHECK-NEXT:  entry:
323 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <4 x i8>*
324 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
325 // CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
326 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
327 //
test_vldrbq_u32(const uint8_t * base)328 uint32x4_t test_vldrbq_u32(const uint8_t *base)
329 {
330     return vldrbq_u32(base);
331 }
332 
333 // CHECK-LABEL: @test_vldrbq_z_s8(
334 // CHECK-NEXT:  entry:
335 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
336 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
337 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
338 // CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP0]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
339 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
340 //
test_vldrbq_z_s8(const int8_t * base,mve_pred16_t p)341 int8x16_t test_vldrbq_z_s8(const int8_t *base, mve_pred16_t p)
342 {
343     return vldrbq_z_s8(base, p);
344 }
345 
346 // CHECK-LABEL: @test_vldrbq_z_s16(
347 // CHECK-NEXT:  entry:
348 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <8 x i8>*
349 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
350 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
351 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP0]], i32 1, <8 x i1> [[TMP2]], <8 x i8> zeroinitializer)
352 // CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i8> [[TMP3]] to <8 x i16>
353 // CHECK-NEXT:    ret <8 x i16> [[TMP4]]
354 //
test_vldrbq_z_s16(const int8_t * base,mve_pred16_t p)355 int16x8_t test_vldrbq_z_s16(const int8_t *base, mve_pred16_t p)
356 {
357     return vldrbq_z_s16(base, p);
358 }
359 
360 // CHECK-LABEL: @test_vldrbq_z_s32(
361 // CHECK-NEXT:  entry:
362 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <4 x i8>*
363 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
364 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
365 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP0]], i32 1, <4 x i1> [[TMP2]], <4 x i8> zeroinitializer)
366 // CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32>
367 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
368 //
test_vldrbq_z_s32(const int8_t * base,mve_pred16_t p)369 int32x4_t test_vldrbq_z_s32(const int8_t *base, mve_pred16_t p)
370 {
371     return vldrbq_z_s32(base, p);
372 }
373 
374 // CHECK-LABEL: @test_vldrbq_z_u8(
375 // CHECK-NEXT:  entry:
376 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
377 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
378 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
379 // CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP0]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
380 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
381 //
test_vldrbq_z_u8(const uint8_t * base,mve_pred16_t p)382 uint8x16_t test_vldrbq_z_u8(const uint8_t *base, mve_pred16_t p)
383 {
384     return vldrbq_z_u8(base, p);
385 }
386 
387 // CHECK-LABEL: @test_vldrbq_z_u16(
388 // CHECK-NEXT:  entry:
389 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <8 x i8>*
390 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
391 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
392 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP0]], i32 1, <8 x i1> [[TMP2]], <8 x i8> zeroinitializer)
393 // CHECK-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i16>
394 // CHECK-NEXT:    ret <8 x i16> [[TMP4]]
395 //
test_vldrbq_z_u16(const uint8_t * base,mve_pred16_t p)396 uint16x8_t test_vldrbq_z_u16(const uint8_t *base, mve_pred16_t p)
397 {
398     return vldrbq_z_u16(base, p);
399 }
400 
401 // CHECK-LABEL: @test_vldrbq_z_u32(
402 // CHECK-NEXT:  entry:
403 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <4 x i8>*
404 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
405 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
406 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP0]], i32 1, <4 x i1> [[TMP2]], <4 x i8> zeroinitializer)
407 // CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32>
408 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
409 //
test_vldrbq_z_u32(const uint8_t * base,mve_pred16_t p)410 uint32x4_t test_vldrbq_z_u32(const uint8_t *base, mve_pred16_t p)
411 {
412     return vldrbq_z_u32(base, p);
413 }
414 
415 // CHECK-LABEL: @test_vldrhq_f16(
416 // CHECK-NEXT:  entry:
417 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <8 x half>*
418 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, <8 x half>* [[TMP0]], align 2
419 // CHECK-NEXT:    ret <8 x half> [[TMP1]]
420 //
test_vldrhq_f16(const float16_t * base)421 float16x8_t test_vldrhq_f16(const float16_t *base)
422 {
423     return vldrhq_f16(base);
424 }
425 
426 // CHECK-LABEL: @test_vldrhq_s16(
427 // CHECK-NEXT:  entry:
428 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
429 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
430 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
431 //
test_vldrhq_s16(const int16_t * base)432 int16x8_t test_vldrhq_s16(const int16_t *base)
433 {
434     return vldrhq_s16(base);
435 }
436 
437 // CHECK-LABEL: @test_vldrhq_s32(
438 // CHECK-NEXT:  entry:
439 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <4 x i16>*
440 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
441 // CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
442 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
443 //
test_vldrhq_s32(const int16_t * base)444 int32x4_t test_vldrhq_s32(const int16_t *base)
445 {
446     return vldrhq_s32(base);
447 }
448 
449 // CHECK-LABEL: @test_vldrhq_u16(
450 // CHECK-NEXT:  entry:
451 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
452 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
453 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
454 //
test_vldrhq_u16(const uint16_t * base)455 uint16x8_t test_vldrhq_u16(const uint16_t *base)
456 {
457     return vldrhq_u16(base);
458 }
459 
460 // CHECK-LABEL: @test_vldrhq_u32(
461 // CHECK-NEXT:  entry:
462 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <4 x i16>*
463 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
464 // CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
465 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
466 //
test_vldrhq_u32(const uint16_t * base)467 uint32x4_t test_vldrhq_u32(const uint16_t *base)
468 {
469     return vldrhq_u32(base);
470 }
471 
472 // CHECK-LABEL: @test_vldrhq_z_f16(
473 // CHECK-NEXT:  entry:
474 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <8 x half>*
475 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
476 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
477 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* [[TMP0]], i32 2, <8 x i1> [[TMP2]], <8 x half> zeroinitializer)
478 // CHECK-NEXT:    ret <8 x half> [[TMP3]]
479 //
test_vldrhq_z_f16(const float16_t * base,mve_pred16_t p)480 float16x8_t test_vldrhq_z_f16(const float16_t *base, mve_pred16_t p)
481 {
482     return vldrhq_z_f16(base, p);
483 }
484 
485 // CHECK-LABEL: @test_vldrhq_z_s16(
486 // CHECK-NEXT:  entry:
487 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
488 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
489 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
490 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP0]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
491 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
492 //
test_vldrhq_z_s16(const int16_t * base,mve_pred16_t p)493 int16x8_t test_vldrhq_z_s16(const int16_t *base, mve_pred16_t p)
494 {
495     return vldrhq_z_s16(base, p);
496 }
497 
498 // CHECK-LABEL: @test_vldrhq_z_s32(
499 // CHECK-NEXT:  entry:
500 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <4 x i16>*
501 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
502 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
503 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP0]], i32 2, <4 x i1> [[TMP2]], <4 x i16> zeroinitializer)
504 // CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
505 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
506 //
test_vldrhq_z_s32(const int16_t * base,mve_pred16_t p)507 int32x4_t test_vldrhq_z_s32(const int16_t *base, mve_pred16_t p)
508 {
509     return vldrhq_z_s32(base, p);
510 }
511 
512 // CHECK-LABEL: @test_vldrhq_z_u16(
513 // CHECK-NEXT:  entry:
514 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
515 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
516 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
517 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP0]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
518 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
519 //
test_vldrhq_z_u16(const uint16_t * base,mve_pred16_t p)520 uint16x8_t test_vldrhq_z_u16(const uint16_t *base, mve_pred16_t p)
521 {
522     return vldrhq_z_u16(base, p);
523 }
524 
525 // CHECK-LABEL: @test_vldrhq_z_u32(
526 // CHECK-NEXT:  entry:
527 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <4 x i16>*
528 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
529 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
530 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP0]], i32 2, <4 x i1> [[TMP2]], <4 x i16> zeroinitializer)
531 // CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
532 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
533 //
test_vldrhq_z_u32(const uint16_t * base,mve_pred16_t p)534 uint32x4_t test_vldrhq_z_u32(const uint16_t *base, mve_pred16_t p)
535 {
536     return vldrhq_z_u32(base, p);
537 }
538 
539 // CHECK-LABEL: @test_vldrwq_f32(
540 // CHECK-NEXT:  entry:
541 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <4 x float>*
542 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
543 // CHECK-NEXT:    ret <4 x float> [[TMP1]]
544 //
test_vldrwq_f32(const float32_t * base)545 float32x4_t test_vldrwq_f32(const float32_t *base)
546 {
547     return vldrwq_f32(base);
548 }
549 
550 // CHECK-LABEL: @test_vldrwq_s32(
551 // CHECK-NEXT:  entry:
552 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
553 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
554 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
555 //
test_vldrwq_s32(const int32_t * base)556 int32x4_t test_vldrwq_s32(const int32_t *base)
557 {
558     return vldrwq_s32(base);
559 }
560 
561 // CHECK-LABEL: @test_vldrwq_u32(
562 // CHECK-NEXT:  entry:
563 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
564 // CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
565 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
566 //
test_vldrwq_u32(const uint32_t * base)567 uint32x4_t test_vldrwq_u32(const uint32_t *base)
568 {
569     return vldrwq_u32(base);
570 }
571 
572 // CHECK-LABEL: @test_vldrwq_z_f32(
573 // CHECK-NEXT:  entry:
574 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <4 x float>*
575 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
576 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
577 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP0]], i32 4, <4 x i1> [[TMP2]], <4 x float> zeroinitializer)
578 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
579 //
test_vldrwq_z_f32(const float32_t * base,mve_pred16_t p)580 float32x4_t test_vldrwq_z_f32(const float32_t *base, mve_pred16_t p)
581 {
582     return vldrwq_z_f32(base, p);
583 }
584 
585 // CHECK-LABEL: @test_vldrwq_z_s32(
586 // CHECK-NEXT:  entry:
587 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
588 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
589 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
590 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP0]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
591 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
592 //
test_vldrwq_z_s32(const int32_t * base,mve_pred16_t p)593 int32x4_t test_vldrwq_z_s32(const int32_t *base, mve_pred16_t p)
594 {
595     return vldrwq_z_s32(base, p);
596 }
597 
598 // CHECK-LABEL: @test_vldrwq_z_u32(
599 // CHECK-NEXT:  entry:
600 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
601 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
602 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
603 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP0]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
604 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
605 //
test_vldrwq_z_u32(const uint32_t * base,mve_pred16_t p)606 uint32x4_t test_vldrwq_z_u32(const uint32_t *base, mve_pred16_t p)
607 {
608     return vldrwq_z_u32(base, p);
609 }
610 
611 // CHECK-LABEL: @test_vst1q_f16(
612 // CHECK-NEXT:  entry:
613 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <8 x half>*
614 // CHECK-NEXT:    store <8 x half> [[VALUE:%.*]], <8 x half>* [[TMP0]], align 2
615 // CHECK-NEXT:    ret void
616 //
test_vst1q_f16(float16_t * base,float16x8_t value)617 void test_vst1q_f16(float16_t *base, float16x8_t value)
618 {
619 #ifdef POLYMORPHIC
620     vst1q(base, value);
621 #else /* POLYMORPHIC */
622     vst1q_f16(base, value);
623 #endif /* POLYMORPHIC */
624 }
625 
626 // CHECK-LABEL: @test_vst1q_f32(
627 // CHECK-NEXT:  entry:
628 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <4 x float>*
629 // CHECK-NEXT:    store <4 x float> [[VALUE:%.*]], <4 x float>* [[TMP0]], align 4
630 // CHECK-NEXT:    ret void
631 //
test_vst1q_f32(float32_t * base,float32x4_t value)632 void test_vst1q_f32(float32_t *base, float32x4_t value)
633 {
634 #ifdef POLYMORPHIC
635     vst1q(base, value);
636 #else /* POLYMORPHIC */
637     vst1q_f32(base, value);
638 #endif /* POLYMORPHIC */
639 }
640 
641 // CHECK-LABEL: @test_vst1q_s8(
642 // CHECK-NEXT:  entry:
643 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
644 // CHECK-NEXT:    store <16 x i8> [[VALUE:%.*]], <16 x i8>* [[TMP0]], align 1
645 // CHECK-NEXT:    ret void
646 //
test_vst1q_s8(int8_t * base,int8x16_t value)647 void test_vst1q_s8(int8_t *base, int8x16_t value)
648 {
649 #ifdef POLYMORPHIC
650     vst1q(base, value);
651 #else /* POLYMORPHIC */
652     vst1q_s8(base, value);
653 #endif /* POLYMORPHIC */
654 }
655 
656 // CHECK-LABEL: @test_vst1q_s16(
657 // CHECK-NEXT:  entry:
658 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
659 // CHECK-NEXT:    store <8 x i16> [[VALUE:%.*]], <8 x i16>* [[TMP0]], align 2
660 // CHECK-NEXT:    ret void
661 //
test_vst1q_s16(int16_t * base,int16x8_t value)662 void test_vst1q_s16(int16_t *base, int16x8_t value)
663 {
664 #ifdef POLYMORPHIC
665     vst1q(base, value);
666 #else /* POLYMORPHIC */
667     vst1q_s16(base, value);
668 #endif /* POLYMORPHIC */
669 }
670 
671 // CHECK-LABEL: @test_vst1q_s32(
672 // CHECK-NEXT:  entry:
673 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
674 // CHECK-NEXT:    store <4 x i32> [[VALUE:%.*]], <4 x i32>* [[TMP0]], align 4
675 // CHECK-NEXT:    ret void
676 //
test_vst1q_s32(int32_t * base,int32x4_t value)677 void test_vst1q_s32(int32_t *base, int32x4_t value)
678 {
679 #ifdef POLYMORPHIC
680     vst1q(base, value);
681 #else /* POLYMORPHIC */
682     vst1q_s32(base, value);
683 #endif /* POLYMORPHIC */
684 }
685 
686 // CHECK-LABEL: @test_vst1q_u8(
687 // CHECK-NEXT:  entry:
688 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
689 // CHECK-NEXT:    store <16 x i8> [[VALUE:%.*]], <16 x i8>* [[TMP0]], align 1
690 // CHECK-NEXT:    ret void
691 //
test_vst1q_u8(uint8_t * base,uint8x16_t value)692 void test_vst1q_u8(uint8_t *base, uint8x16_t value)
693 {
694 #ifdef POLYMORPHIC
695     vst1q(base, value);
696 #else /* POLYMORPHIC */
697     vst1q_u8(base, value);
698 #endif /* POLYMORPHIC */
699 }
700 
701 // CHECK-LABEL: @test_vst1q_u16(
702 // CHECK-NEXT:  entry:
703 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
704 // CHECK-NEXT:    store <8 x i16> [[VALUE:%.*]], <8 x i16>* [[TMP0]], align 2
705 // CHECK-NEXT:    ret void
706 //
test_vst1q_u16(uint16_t * base,uint16x8_t value)707 void test_vst1q_u16(uint16_t *base, uint16x8_t value)
708 {
709 #ifdef POLYMORPHIC
710     vst1q(base, value);
711 #else /* POLYMORPHIC */
712     vst1q_u16(base, value);
713 #endif /* POLYMORPHIC */
714 }
715 
716 // CHECK-LABEL: @test_vst1q_u32(
717 // CHECK-NEXT:  entry:
718 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
719 // CHECK-NEXT:    store <4 x i32> [[VALUE:%.*]], <4 x i32>* [[TMP0]], align 4
720 // CHECK-NEXT:    ret void
721 //
test_vst1q_u32(uint32_t * base,uint32x4_t value)722 void test_vst1q_u32(uint32_t *base, uint32x4_t value)
723 {
724 #ifdef POLYMORPHIC
725     vst1q(base, value);
726 #else /* POLYMORPHIC */
727     vst1q_u32(base, value);
728 #endif /* POLYMORPHIC */
729 }
730 
731 // CHECK-LABEL: @test_vst1q_p_f16(
732 // CHECK-NEXT:  entry:
733 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <8 x half>*
734 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
735 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
736 // CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> [[VALUE:%.*]], <8 x half>* [[TMP0]], i32 2, <8 x i1> [[TMP2]])
737 // CHECK-NEXT:    ret void
738 //
test_vst1q_p_f16(float16_t * base,float16x8_t value,mve_pred16_t p)739 void test_vst1q_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
740 {
741 #ifdef POLYMORPHIC
742     vst1q_p(base, value, p);
743 #else /* POLYMORPHIC */
744     vst1q_p_f16(base, value, p);
745 #endif /* POLYMORPHIC */
746 }
747 
748 // CHECK-LABEL: @test_vst1q_p_f32(
749 // CHECK-NEXT:  entry:
750 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <4 x float>*
751 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
752 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
753 // CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> [[VALUE:%.*]], <4 x float>* [[TMP0]], i32 4, <4 x i1> [[TMP2]])
754 // CHECK-NEXT:    ret void
755 //
test_vst1q_p_f32(float32_t * base,float32x4_t value,mve_pred16_t p)756 void test_vst1q_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
757 {
758 #ifdef POLYMORPHIC
759     vst1q_p(base, value, p);
760 #else /* POLYMORPHIC */
761     vst1q_p_f32(base, value, p);
762 #endif /* POLYMORPHIC */
763 }
764 
765 // CHECK-LABEL: @test_vst1q_p_s8(
766 // CHECK-NEXT:  entry:
767 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
768 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
769 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
770 // CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[VALUE:%.*]], <16 x i8>* [[TMP0]], i32 1, <16 x i1> [[TMP2]])
771 // CHECK-NEXT:    ret void
772 //
test_vst1q_p_s8(int8_t * base,int8x16_t value,mve_pred16_t p)773 void test_vst1q_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
774 {
775 #ifdef POLYMORPHIC
776     vst1q_p(base, value, p);
777 #else /* POLYMORPHIC */
778     vst1q_p_s8(base, value, p);
779 #endif /* POLYMORPHIC */
780 }
781 
782 // CHECK-LABEL: @test_vst1q_p_s16(
783 // CHECK-NEXT:  entry:
784 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
785 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
786 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
787 // CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> [[VALUE:%.*]], <8 x i16>* [[TMP0]], i32 2, <8 x i1> [[TMP2]])
788 // CHECK-NEXT:    ret void
789 //
test_vst1q_p_s16(int16_t * base,int16x8_t value,mve_pred16_t p)790 void test_vst1q_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
791 {
792 #ifdef POLYMORPHIC
793     vst1q_p(base, value, p);
794 #else /* POLYMORPHIC */
795     vst1q_p_s16(base, value, p);
796 #endif /* POLYMORPHIC */
797 }
798 
799 // CHECK-LABEL: @test_vst1q_p_s32(
800 // CHECK-NEXT:  entry:
801 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
802 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
803 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
804 // CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[VALUE:%.*]], <4 x i32>* [[TMP0]], i32 4, <4 x i1> [[TMP2]])
805 // CHECK-NEXT:    ret void
806 //
test_vst1q_p_s32(int32_t * base,int32x4_t value,mve_pred16_t p)807 void test_vst1q_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
808 {
809 #ifdef POLYMORPHIC
810     vst1q_p(base, value, p);
811 #else /* POLYMORPHIC */
812     vst1q_p_s32(base, value, p);
813 #endif /* POLYMORPHIC */
814 }
815 
816 // CHECK-LABEL: @test_vst1q_p_u8(
817 // CHECK-NEXT:  entry:
818 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
819 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
820 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
821 // CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[VALUE:%.*]], <16 x i8>* [[TMP0]], i32 1, <16 x i1> [[TMP2]])
822 // CHECK-NEXT:    ret void
823 //
test_vst1q_p_u8(uint8_t * base,uint8x16_t value,mve_pred16_t p)824 void test_vst1q_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
825 {
826 #ifdef POLYMORPHIC
827     vst1q_p(base, value, p);
828 #else /* POLYMORPHIC */
829     vst1q_p_u8(base, value, p);
830 #endif /* POLYMORPHIC */
831 }
832 
833 // CHECK-LABEL: @test_vst1q_p_u16(
834 // CHECK-NEXT:  entry:
835 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
836 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
837 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
838 // CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> [[VALUE:%.*]], <8 x i16>* [[TMP0]], i32 2, <8 x i1> [[TMP2]])
839 // CHECK-NEXT:    ret void
840 //
test_vst1q_p_u16(uint16_t * base,uint16x8_t value,mve_pred16_t p)841 void test_vst1q_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
842 {
843 #ifdef POLYMORPHIC
844     vst1q_p(base, value, p);
845 #else /* POLYMORPHIC */
846     vst1q_p_u16(base, value, p);
847 #endif /* POLYMORPHIC */
848 }
849 
850 // CHECK-LABEL: @test_vst1q_p_u32(
851 // CHECK-NEXT:  entry:
852 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
853 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
854 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
855 // CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[VALUE:%.*]], <4 x i32>* [[TMP0]], i32 4, <4 x i1> [[TMP2]])
856 // CHECK-NEXT:    ret void
857 //
test_vst1q_p_u32(uint32_t * base,uint32x4_t value,mve_pred16_t p)858 void test_vst1q_p_u32(uint32_t *base, uint32x4_t value, mve_pred16_t p)
859 {
860 #ifdef POLYMORPHIC
861     vst1q_p(base, value, p);
862 #else /* POLYMORPHIC */
863     vst1q_p_u32(base, value, p);
864 #endif /* POLYMORPHIC */
865 }
866 
867 // CHECK-LABEL: @test_vstrbq_s8(
868 // CHECK-NEXT:  entry:
869 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
870 // CHECK-NEXT:    store <16 x i8> [[VALUE:%.*]], <16 x i8>* [[TMP0]], align 1
871 // CHECK-NEXT:    ret void
872 //
test_vstrbq_s8(int8_t * base,int8x16_t value)873 void test_vstrbq_s8(int8_t *base, int8x16_t value)
874 {
875 #ifdef POLYMORPHIC
876     vstrbq(base, value);
877 #else /* POLYMORPHIC */
878     vstrbq_s8(base, value);
879 #endif /* POLYMORPHIC */
880 }
881 
882 // CHECK-LABEL: @test_vstrbq_s16(
883 // CHECK-NEXT:  entry:
884 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8>
885 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <8 x i8>*
886 // CHECK-NEXT:    store <8 x i8> [[TMP0]], <8 x i8>* [[TMP1]], align 1
887 // CHECK-NEXT:    ret void
888 //
test_vstrbq_s16(int8_t * base,int16x8_t value)889 void test_vstrbq_s16(int8_t *base, int16x8_t value)
890 {
891 #ifdef POLYMORPHIC
892     vstrbq(base, value);
893 #else /* POLYMORPHIC */
894     vstrbq_s16(base, value);
895 #endif /* POLYMORPHIC */
896 }
897 
898 // CHECK-LABEL: @test_vstrbq_s32(
899 // CHECK-NEXT:  entry:
900 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8>
901 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <4 x i8>*
902 // CHECK-NEXT:    store <4 x i8> [[TMP0]], <4 x i8>* [[TMP1]], align 1
903 // CHECK-NEXT:    ret void
904 //
test_vstrbq_s32(int8_t * base,int32x4_t value)905 void test_vstrbq_s32(int8_t *base, int32x4_t value)
906 {
907 #ifdef POLYMORPHIC
908     vstrbq(base, value);
909 #else /* POLYMORPHIC */
910     vstrbq_s32(base, value);
911 #endif /* POLYMORPHIC */
912 }
913 
914 // CHECK-LABEL: @test_vstrbq_u8(
915 // CHECK-NEXT:  entry:
916 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
917 // CHECK-NEXT:    store <16 x i8> [[VALUE:%.*]], <16 x i8>* [[TMP0]], align 1
918 // CHECK-NEXT:    ret void
919 //
test_vstrbq_u8(uint8_t * base,uint8x16_t value)920 void test_vstrbq_u8(uint8_t *base, uint8x16_t value)
921 {
922 #ifdef POLYMORPHIC
923     vstrbq(base, value);
924 #else /* POLYMORPHIC */
925     vstrbq_u8(base, value);
926 #endif /* POLYMORPHIC */
927 }
928 
929 // CHECK-LABEL: @test_vstrbq_u16(
930 // CHECK-NEXT:  entry:
931 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8>
932 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <8 x i8>*
933 // CHECK-NEXT:    store <8 x i8> [[TMP0]], <8 x i8>* [[TMP1]], align 1
934 // CHECK-NEXT:    ret void
935 //
test_vstrbq_u16(uint8_t * base,uint16x8_t value)936 void test_vstrbq_u16(uint8_t *base, uint16x8_t value)
937 {
938 #ifdef POLYMORPHIC
939     vstrbq(base, value);
940 #else /* POLYMORPHIC */
941     vstrbq_u16(base, value);
942 #endif /* POLYMORPHIC */
943 }
944 
945 // CHECK-LABEL: @test_vstrbq_u32(
946 // CHECK-NEXT:  entry:
947 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8>
948 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <4 x i8>*
949 // CHECK-NEXT:    store <4 x i8> [[TMP0]], <4 x i8>* [[TMP1]], align 1
950 // CHECK-NEXT:    ret void
951 //
test_vstrbq_u32(uint8_t * base,uint32x4_t value)952 void test_vstrbq_u32(uint8_t *base, uint32x4_t value)
953 {
954 #ifdef POLYMORPHIC
955     vstrbq(base, value);
956 #else /* POLYMORPHIC */
957     vstrbq_u32(base, value);
958 #endif /* POLYMORPHIC */
959 }
960 
961 // CHECK-LABEL: @test_vstrbq_p_s8(
962 // CHECK-NEXT:  entry:
963 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
964 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
965 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
966 // CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[VALUE:%.*]], <16 x i8>* [[TMP0]], i32 1, <16 x i1> [[TMP2]])
967 // CHECK-NEXT:    ret void
968 //
test_vstrbq_p_s8(int8_t * base,int8x16_t value,mve_pred16_t p)969 void test_vstrbq_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
970 {
971 #ifdef POLYMORPHIC
972     vstrbq_p(base, value, p);
973 #else /* POLYMORPHIC */
974     vstrbq_p_s8(base, value, p);
975 #endif /* POLYMORPHIC */
976 }
977 
978 // CHECK-LABEL: @test_vstrbq_p_s16(
979 // CHECK-NEXT:  entry:
980 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8>
981 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <8 x i8>*
982 // CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
983 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
984 // CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP0]], <8 x i8>* [[TMP1]], i32 1, <8 x i1> [[TMP3]])
985 // CHECK-NEXT:    ret void
986 //
test_vstrbq_p_s16(int8_t * base,int16x8_t value,mve_pred16_t p)987 void test_vstrbq_p_s16(int8_t *base, int16x8_t value, mve_pred16_t p)
988 {
989 #ifdef POLYMORPHIC
990     vstrbq_p(base, value, p);
991 #else /* POLYMORPHIC */
992     vstrbq_p_s16(base, value, p);
993 #endif /* POLYMORPHIC */
994 }
995 
996 // CHECK-LABEL: @test_vstrbq_p_s32(
997 // CHECK-NEXT:  entry:
998 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8>
999 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <4 x i8>*
1000 // CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
1001 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
1002 // CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> [[TMP0]], <4 x i8>* [[TMP1]], i32 1, <4 x i1> [[TMP3]])
1003 // CHECK-NEXT:    ret void
1004 //
test_vstrbq_p_s32(int8_t * base,int32x4_t value,mve_pred16_t p)1005 void test_vstrbq_p_s32(int8_t *base, int32x4_t value, mve_pred16_t p)
1006 {
1007 #ifdef POLYMORPHIC
1008     vstrbq_p(base, value, p);
1009 #else /* POLYMORPHIC */
1010     vstrbq_p_s32(base, value, p);
1011 #endif /* POLYMORPHIC */
1012 }
1013 
1014 // CHECK-LABEL: @test_vstrbq_p_u8(
1015 // CHECK-NEXT:  entry:
1016 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <16 x i8>*
1017 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1018 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1019 // CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[VALUE:%.*]], <16 x i8>* [[TMP0]], i32 1, <16 x i1> [[TMP2]])
1020 // CHECK-NEXT:    ret void
1021 //
test_vstrbq_p_u8(uint8_t * base,uint8x16_t value,mve_pred16_t p)1022 void test_vstrbq_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
1023 {
1024 #ifdef POLYMORPHIC
1025     vstrbq_p(base, value, p);
1026 #else /* POLYMORPHIC */
1027     vstrbq_p_u8(base, value, p);
1028 #endif /* POLYMORPHIC */
1029 }
1030 
1031 // CHECK-LABEL: @test_vstrbq_p_u16(
1032 // CHECK-NEXT:  entry:
1033 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8>
1034 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <8 x i8>*
1035 // CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
1036 // CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
1037 // CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP0]], <8 x i8>* [[TMP1]], i32 1, <8 x i1> [[TMP3]])
1038 // CHECK-NEXT:    ret void
1039 //
test_vstrbq_p_u16(uint8_t * base,uint16x8_t value,mve_pred16_t p)1040 void test_vstrbq_p_u16(uint8_t *base, uint16x8_t value, mve_pred16_t p)
1041 {
1042 #ifdef POLYMORPHIC
1043     vstrbq_p(base, value, p);
1044 #else /* POLYMORPHIC */
1045     vstrbq_p_u16(base, value, p);
1046 #endif /* POLYMORPHIC */
1047 }
1048 
1049 // CHECK-LABEL: @test_vstrbq_p_u32(
1050 // CHECK-NEXT:  entry:
1051 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8>
1052 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <4 x i8>*
1053 // CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
1054 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
1055 // CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> [[TMP0]], <4 x i8>* [[TMP1]], i32 1, <4 x i1> [[TMP3]])
1056 // CHECK-NEXT:    ret void
1057 //
test_vstrbq_p_u32(uint8_t * base,uint32x4_t value,mve_pred16_t p)1058 void test_vstrbq_p_u32(uint8_t *base, uint32x4_t value, mve_pred16_t p)
1059 {
1060 #ifdef POLYMORPHIC
1061     vstrbq_p(base, value, p);
1062 #else /* POLYMORPHIC */
1063     vstrbq_p_u32(base, value, p);
1064 #endif /* POLYMORPHIC */
1065 }
1066 
1067 // CHECK-LABEL: @test_vstrhq_f16(
1068 // CHECK-NEXT:  entry:
1069 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <8 x half>*
1070 // CHECK-NEXT:    store <8 x half> [[VALUE:%.*]], <8 x half>* [[TMP0]], align 2
1071 // CHECK-NEXT:    ret void
1072 //
test_vstrhq_f16(float16_t * base,float16x8_t value)1073 void test_vstrhq_f16(float16_t *base, float16x8_t value)
1074 {
1075 #ifdef POLYMORPHIC
1076     vstrhq(base, value);
1077 #else /* POLYMORPHIC */
1078     vstrhq_f16(base, value);
1079 #endif /* POLYMORPHIC */
1080 }
1081 
1082 // CHECK-LABEL: @test_vstrhq_s16(
1083 // CHECK-NEXT:  entry:
1084 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
1085 // CHECK-NEXT:    store <8 x i16> [[VALUE:%.*]], <8 x i16>* [[TMP0]], align 2
1086 // CHECK-NEXT:    ret void
1087 //
test_vstrhq_s16(int16_t * base,int16x8_t value)1088 void test_vstrhq_s16(int16_t *base, int16x8_t value)
1089 {
1090 #ifdef POLYMORPHIC
1091     vstrhq(base, value);
1092 #else /* POLYMORPHIC */
1093     vstrhq_s16(base, value);
1094 #endif /* POLYMORPHIC */
1095 }
1096 
1097 // CHECK-LABEL: @test_vstrhq_s32(
1098 // CHECK-NEXT:  entry:
1099 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16>
1100 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <4 x i16>*
1101 // CHECK-NEXT:    store <4 x i16> [[TMP0]], <4 x i16>* [[TMP1]], align 2
1102 // CHECK-NEXT:    ret void
1103 //
test_vstrhq_s32(int16_t * base,int32x4_t value)1104 void test_vstrhq_s32(int16_t *base, int32x4_t value)
1105 {
1106 #ifdef POLYMORPHIC
1107     vstrhq(base, value);
1108 #else /* POLYMORPHIC */
1109     vstrhq_s32(base, value);
1110 #endif /* POLYMORPHIC */
1111 }
1112 
1113 // CHECK-LABEL: @test_vstrhq_u16(
1114 // CHECK-NEXT:  entry:
1115 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
1116 // CHECK-NEXT:    store <8 x i16> [[VALUE:%.*]], <8 x i16>* [[TMP0]], align 2
1117 // CHECK-NEXT:    ret void
1118 //
test_vstrhq_u16(uint16_t * base,uint16x8_t value)1119 void test_vstrhq_u16(uint16_t *base, uint16x8_t value)
1120 {
1121 #ifdef POLYMORPHIC
1122     vstrhq(base, value);
1123 #else /* POLYMORPHIC */
1124     vstrhq_u16(base, value);
1125 #endif /* POLYMORPHIC */
1126 }
1127 
1128 // CHECK-LABEL: @test_vstrhq_u32(
1129 // CHECK-NEXT:  entry:
1130 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16>
1131 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <4 x i16>*
1132 // CHECK-NEXT:    store <4 x i16> [[TMP0]], <4 x i16>* [[TMP1]], align 2
1133 // CHECK-NEXT:    ret void
1134 //
test_vstrhq_u32(uint16_t * base,uint32x4_t value)1135 void test_vstrhq_u32(uint16_t *base, uint32x4_t value)
1136 {
1137 #ifdef POLYMORPHIC
1138     vstrhq(base, value);
1139 #else /* POLYMORPHIC */
1140     vstrhq_u32(base, value);
1141 #endif /* POLYMORPHIC */
1142 }
1143 
1144 // CHECK-LABEL: @test_vstrhq_p_f16(
1145 // CHECK-NEXT:  entry:
1146 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <8 x half>*
1147 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1148 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1149 // CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> [[VALUE:%.*]], <8 x half>* [[TMP0]], i32 2, <8 x i1> [[TMP2]])
1150 // CHECK-NEXT:    ret void
1151 //
test_vstrhq_p_f16(float16_t * base,float16x8_t value,mve_pred16_t p)1152 void test_vstrhq_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
1153 {
1154 #ifdef POLYMORPHIC
1155     vstrhq_p(base, value, p);
1156 #else /* POLYMORPHIC */
1157     vstrhq_p_f16(base, value, p);
1158 #endif /* POLYMORPHIC */
1159 }
1160 
1161 // CHECK-LABEL: @test_vstrhq_p_s16(
1162 // CHECK-NEXT:  entry:
1163 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
1164 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1165 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1166 // CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> [[VALUE:%.*]], <8 x i16>* [[TMP0]], i32 2, <8 x i1> [[TMP2]])
1167 // CHECK-NEXT:    ret void
1168 //
test_vstrhq_p_s16(int16_t * base,int16x8_t value,mve_pred16_t p)1169 void test_vstrhq_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
1170 {
1171 #ifdef POLYMORPHIC
1172     vstrhq_p(base, value, p);
1173 #else /* POLYMORPHIC */
1174     vstrhq_p_s16(base, value, p);
1175 #endif /* POLYMORPHIC */
1176 }
1177 
1178 // CHECK-LABEL: @test_vstrhq_p_s32(
1179 // CHECK-NEXT:  entry:
1180 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16>
1181 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <4 x i16>*
1182 // CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
1183 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
1184 // CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> [[TMP0]], <4 x i16>* [[TMP1]], i32 2, <4 x i1> [[TMP3]])
1185 // CHECK-NEXT:    ret void
1186 //
test_vstrhq_p_s32(int16_t * base,int32x4_t value,mve_pred16_t p)1187 void test_vstrhq_p_s32(int16_t *base, int32x4_t value, mve_pred16_t p)
1188 {
1189 #ifdef POLYMORPHIC
1190     vstrhq_p(base, value, p);
1191 #else /* POLYMORPHIC */
1192     vstrhq_p_s32(base, value, p);
1193 #endif /* POLYMORPHIC */
1194 }
1195 
1196 // CHECK-LABEL: @test_vstrhq_p_u16(
1197 // CHECK-NEXT:  entry:
1198 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <8 x i16>*
1199 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1200 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1201 // CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> [[VALUE:%.*]], <8 x i16>* [[TMP0]], i32 2, <8 x i1> [[TMP2]])
1202 // CHECK-NEXT:    ret void
1203 //
test_vstrhq_p_u16(uint16_t * base,uint16x8_t value,mve_pred16_t p)1204 void test_vstrhq_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
1205 {
1206 #ifdef POLYMORPHIC
1207     vstrhq_p(base, value, p);
1208 #else /* POLYMORPHIC */
1209     vstrhq_p_u16(base, value, p);
1210 #endif /* POLYMORPHIC */
1211 }
1212 
1213 // CHECK-LABEL: @test_vstrhq_p_u32(
1214 // CHECK-NEXT:  entry:
1215 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16>
1216 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <4 x i16>*
1217 // CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
1218 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
1219 // CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> [[TMP0]], <4 x i16>* [[TMP1]], i32 2, <4 x i1> [[TMP3]])
1220 // CHECK-NEXT:    ret void
1221 //
test_vstrhq_p_u32(uint16_t * base,uint32x4_t value,mve_pred16_t p)1222 void test_vstrhq_p_u32(uint16_t *base, uint32x4_t value, mve_pred16_t p)
1223 {
1224 #ifdef POLYMORPHIC
1225     vstrhq_p(base, value, p);
1226 #else /* POLYMORPHIC */
1227     vstrhq_p_u32(base, value, p);
1228 #endif /* POLYMORPHIC */
1229 }
1230 
1231 // CHECK-LABEL: @test_vstrwq_f32(
1232 // CHECK-NEXT:  entry:
1233 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <4 x float>*
1234 // CHECK-NEXT:    store <4 x float> [[VALUE:%.*]], <4 x float>* [[TMP0]], align 4
1235 // CHECK-NEXT:    ret void
1236 //
test_vstrwq_f32(float32_t * base,float32x4_t value)1237 void test_vstrwq_f32(float32_t *base, float32x4_t value)
1238 {
1239 #ifdef POLYMORPHIC
1240     vstrwq(base, value);
1241 #else /* POLYMORPHIC */
1242     vstrwq_f32(base, value);
1243 #endif /* POLYMORPHIC */
1244 }
1245 
1246 // CHECK-LABEL: @test_vstrwq_s32(
1247 // CHECK-NEXT:  entry:
1248 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
1249 // CHECK-NEXT:    store <4 x i32> [[VALUE:%.*]], <4 x i32>* [[TMP0]], align 4
1250 // CHECK-NEXT:    ret void
1251 //
test_vstrwq_s32(int32_t * base,int32x4_t value)1252 void test_vstrwq_s32(int32_t *base, int32x4_t value)
1253 {
1254 #ifdef POLYMORPHIC
1255     vstrwq(base, value);
1256 #else /* POLYMORPHIC */
1257     vstrwq_s32(base, value);
1258 #endif /* POLYMORPHIC */
1259 }
1260 
1261 // CHECK-LABEL: @test_vstrwq_u32(
1262 // CHECK-NEXT:  entry:
1263 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
1264 // CHECK-NEXT:    store <4 x i32> [[VALUE:%.*]], <4 x i32>* [[TMP0]], align 4
1265 // CHECK-NEXT:    ret void
1266 //
test_vstrwq_u32(uint32_t * base,uint32x4_t value)1267 void test_vstrwq_u32(uint32_t *base, uint32x4_t value)
1268 {
1269 #ifdef POLYMORPHIC
1270     vstrwq(base, value);
1271 #else /* POLYMORPHIC */
1272     vstrwq_u32(base, value);
1273 #endif /* POLYMORPHIC */
1274 }
1275 
1276 // CHECK-LABEL: @test_vstrwq_p_f32(
1277 // CHECK-NEXT:  entry:
1278 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <4 x float>*
1279 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1280 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1281 // CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> [[VALUE:%.*]], <4 x float>* [[TMP0]], i32 4, <4 x i1> [[TMP2]])
1282 // CHECK-NEXT:    ret void
1283 //
test_vstrwq_p_f32(float32_t * base,float32x4_t value,mve_pred16_t p)1284 void test_vstrwq_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
1285 {
1286 #ifdef POLYMORPHIC
1287     vstrwq_p(base, value, p);
1288 #else /* POLYMORPHIC */
1289     vstrwq_p_f32(base, value, p);
1290 #endif /* POLYMORPHIC */
1291 }
1292 
1293 // CHECK-LABEL: @test_vstrwq_p_s32(
1294 // CHECK-NEXT:  entry:
1295 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
1296 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1297 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1298 // CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[VALUE:%.*]], <4 x i32>* [[TMP0]], i32 4, <4 x i1> [[TMP2]])
1299 // CHECK-NEXT:    ret void
1300 //
test_vstrwq_p_s32(int32_t * base,int32x4_t value,mve_pred16_t p)1301 void test_vstrwq_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
1302 {
1303 #ifdef POLYMORPHIC
1304     vstrwq_p(base, value, p);
1305 #else /* POLYMORPHIC */
1306     vstrwq_p_s32(base, value, p);
1307 #endif /* POLYMORPHIC */
1308 }
1309 
1310 // CHECK-LABEL: @test_vstrwq_p_u32(
1311 // CHECK-NEXT:  entry:
1312 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <4 x i32>*
1313 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1314 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1315 // CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[VALUE:%.*]], <4 x i32>* [[TMP0]], i32 4, <4 x i1> [[TMP2]])
1316 // CHECK-NEXT:    ret void
1317 //
test_vstrwq_p_u32(uint32_t * base,uint32x4_t value,mve_pred16_t p)1318 void test_vstrwq_p_u32(uint32_t *base, uint32x4_t value, mve_pred16_t p)
1319 {
1320 #ifdef POLYMORPHIC
1321     vstrwq_p(base, value, p);
1322 #else /* POLYMORPHIC */
1323     vstrwq_p_u32(base, value, p);
1324 #endif /* POLYMORPHIC */
1325 }
1326