1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg \
4 // RUN: | FileCheck %s
5
6 // Test new aarch64 intrinsics and types
7
8 #include <arm_neon.h>
9
10 // CHECK-LABEL: @test_vadd_s8(
11 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
12 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t v1,int8x8_t v2)13 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
14 return vadd_s8(v1, v2);
15 }
16
17 // CHECK-LABEL: @test_vadd_s16(
18 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
19 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t v1,int16x4_t v2)20 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
21 return vadd_s16(v1, v2);
22 }
23
24 // CHECK-LABEL: @test_vadd_s32(
25 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
26 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t v1,int32x2_t v2)27 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
28 return vadd_s32(v1, v2);
29 }
30
31 // CHECK-LABEL: @test_vadd_s64(
32 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
33 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t v1,int64x1_t v2)34 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
35 return vadd_s64(v1, v2);
36 }
37
38 // CHECK-LABEL: @test_vadd_f32(
39 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
40 // CHECK: ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t v1,float32x2_t v2)41 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
42 return vadd_f32(v1, v2);
43 }
44
45 // CHECK-LABEL: @test_vadd_u8(
46 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
47 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t v1,uint8x8_t v2)48 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
49 return vadd_u8(v1, v2);
50 }
51
52 // CHECK-LABEL: @test_vadd_u16(
53 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
54 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t v1,uint16x4_t v2)55 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
56 return vadd_u16(v1, v2);
57 }
58
59 // CHECK-LABEL: @test_vadd_u32(
60 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
61 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t v1,uint32x2_t v2)62 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
63 return vadd_u32(v1, v2);
64 }
65
66 // CHECK-LABEL: @test_vadd_u64(
67 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
68 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t v1,uint64x1_t v2)69 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
70 return vadd_u64(v1, v2);
71 }
72
73 // CHECK-LABEL: @test_vaddq_s8(
74 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
75 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t v1,int8x16_t v2)76 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
77 return vaddq_s8(v1, v2);
78 }
79
80 // CHECK-LABEL: @test_vaddq_s16(
81 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
82 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t v1,int16x8_t v2)83 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
84 return vaddq_s16(v1, v2);
85 }
86
87 // CHECK-LABEL: @test_vaddq_s32(
88 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
89 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t v1,int32x4_t v2)90 int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
91 return vaddq_s32(v1, v2);
92 }
93
94 // CHECK-LABEL: @test_vaddq_s64(
95 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
96 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t v1,int64x2_t v2)97 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
98 return vaddq_s64(v1, v2);
99 }
100
101 // CHECK-LABEL: @test_vaddq_f32(
102 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
103 // CHECK: ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t v1,float32x4_t v2)104 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
105 return vaddq_f32(v1, v2);
106 }
107
108 // CHECK-LABEL: @test_vaddq_f64(
109 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
110 // CHECK: ret <2 x double> [[ADD_I]]
test_vaddq_f64(float64x2_t v1,float64x2_t v2)111 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
112 return vaddq_f64(v1, v2);
113 }
114
115 // CHECK-LABEL: @test_vaddq_u8(
116 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
117 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t v1,uint8x16_t v2)118 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
119 return vaddq_u8(v1, v2);
120 }
121
122 // CHECK-LABEL: @test_vaddq_u16(
123 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
124 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t v1,uint16x8_t v2)125 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
126 return vaddq_u16(v1, v2);
127 }
128
129 // CHECK-LABEL: @test_vaddq_u32(
130 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
131 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t v1,uint32x4_t v2)132 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
133 return vaddq_u32(v1, v2);
134 }
135
136 // CHECK-LABEL: @test_vaddq_u64(
137 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
138 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t v1,uint64x2_t v2)139 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
140 return vaddq_u64(v1, v2);
141 }
142
143 // CHECK-LABEL: @test_vsub_s8(
144 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
145 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t v1,int8x8_t v2)146 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
147 return vsub_s8(v1, v2);
148 }
149
150 // CHECK-LABEL: @test_vsub_s16(
151 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
152 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t v1,int16x4_t v2)153 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
154 return vsub_s16(v1, v2);
155 }
156
157 // CHECK-LABEL: @test_vsub_s32(
158 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
159 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t v1,int32x2_t v2)160 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
161 return vsub_s32(v1, v2);
162 }
163
164 // CHECK-LABEL: @test_vsub_s64(
165 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
166 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t v1,int64x1_t v2)167 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
168 return vsub_s64(v1, v2);
169 }
170
171 // CHECK-LABEL: @test_vsub_f32(
172 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
173 // CHECK: ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t v1,float32x2_t v2)174 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
175 return vsub_f32(v1, v2);
176 }
177
178 // CHECK-LABEL: @test_vsub_u8(
179 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
180 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t v1,uint8x8_t v2)181 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
182 return vsub_u8(v1, v2);
183 }
184
185 // CHECK-LABEL: @test_vsub_u16(
186 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
187 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t v1,uint16x4_t v2)188 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
189 return vsub_u16(v1, v2);
190 }
191
192 // CHECK-LABEL: @test_vsub_u32(
193 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
194 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t v1,uint32x2_t v2)195 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
196 return vsub_u32(v1, v2);
197 }
198
199 // CHECK-LABEL: @test_vsub_u64(
200 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
201 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t v1,uint64x1_t v2)202 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
203 return vsub_u64(v1, v2);
204 }
205
206 // CHECK-LABEL: @test_vsubq_s8(
207 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
208 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t v1,int8x16_t v2)209 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
210 return vsubq_s8(v1, v2);
211 }
212
213 // CHECK-LABEL: @test_vsubq_s16(
214 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
215 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t v1,int16x8_t v2)216 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
217 return vsubq_s16(v1, v2);
218 }
219
220 // CHECK-LABEL: @test_vsubq_s32(
221 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
222 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t v1,int32x4_t v2)223 int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) {
224 return vsubq_s32(v1, v2);
225 }
226
227 // CHECK-LABEL: @test_vsubq_s64(
228 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
229 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t v1,int64x2_t v2)230 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
231 return vsubq_s64(v1, v2);
232 }
233
234 // CHECK-LABEL: @test_vsubq_f32(
235 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
236 // CHECK: ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t v1,float32x4_t v2)237 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
238 return vsubq_f32(v1, v2);
239 }
240
241 // CHECK-LABEL: @test_vsubq_f64(
242 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
243 // CHECK: ret <2 x double> [[SUB_I]]
test_vsubq_f64(float64x2_t v1,float64x2_t v2)244 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
245 return vsubq_f64(v1, v2);
246 }
247
248 // CHECK-LABEL: @test_vsubq_u8(
249 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
250 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t v1,uint8x16_t v2)251 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
252 return vsubq_u8(v1, v2);
253 }
254
255 // CHECK-LABEL: @test_vsubq_u16(
256 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
257 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t v1,uint16x8_t v2)258 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
259 return vsubq_u16(v1, v2);
260 }
261
262 // CHECK-LABEL: @test_vsubq_u32(
263 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
264 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t v1,uint32x4_t v2)265 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
266 return vsubq_u32(v1, v2);
267 }
268
269 // CHECK-LABEL: @test_vsubq_u64(
270 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
271 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t v1,uint64x2_t v2)272 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
273 return vsubq_u64(v1, v2);
274 }
275
276 // CHECK-LABEL: @test_vmul_s8(
277 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
278 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t v1,int8x8_t v2)279 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
280 return vmul_s8(v1, v2);
281 }
282
283 // CHECK-LABEL: @test_vmul_s16(
284 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
285 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t v1,int16x4_t v2)286 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
287 return vmul_s16(v1, v2);
288 }
289
290 // CHECK-LABEL: @test_vmul_s32(
291 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
292 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t v1,int32x2_t v2)293 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
294 return vmul_s32(v1, v2);
295 }
296
297 // CHECK-LABEL: @test_vmul_f32(
298 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
299 // CHECK: ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t v1,float32x2_t v2)300 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
301 return vmul_f32(v1, v2);
302 }
303
304 // CHECK-LABEL: @test_vmul_u8(
305 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
306 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t v1,uint8x8_t v2)307 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
308 return vmul_u8(v1, v2);
309 }
310
311 // CHECK-LABEL: @test_vmul_u16(
312 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
313 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t v1,uint16x4_t v2)314 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
315 return vmul_u16(v1, v2);
316 }
317
318 // CHECK-LABEL: @test_vmul_u32(
319 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
320 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t v1,uint32x2_t v2)321 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
322 return vmul_u32(v1, v2);
323 }
324
325 // CHECK-LABEL: @test_vmulq_s8(
326 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
327 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t v1,int8x16_t v2)328 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
329 return vmulq_s8(v1, v2);
330 }
331
332 // CHECK-LABEL: @test_vmulq_s16(
333 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
334 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t v1,int16x8_t v2)335 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
336 return vmulq_s16(v1, v2);
337 }
338
339 // CHECK-LABEL: @test_vmulq_s32(
340 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
341 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t v1,int32x4_t v2)342 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
343 return vmulq_s32(v1, v2);
344 }
345
346 // CHECK-LABEL: @test_vmulq_u8(
347 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
348 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t v1,uint8x16_t v2)349 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
350 return vmulq_u8(v1, v2);
351 }
352
353 // CHECK-LABEL: @test_vmulq_u16(
354 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
355 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t v1,uint16x8_t v2)356 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
357 return vmulq_u16(v1, v2);
358 }
359
360 // CHECK-LABEL: @test_vmulq_u32(
361 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
362 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t v1,uint32x4_t v2)363 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
364 return vmulq_u32(v1, v2);
365 }
366
367 // CHECK-LABEL: @test_vmulq_f32(
368 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
369 // CHECK: ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t v1,float32x4_t v2)370 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
371 return vmulq_f32(v1, v2);
372 }
373
374 // CHECK-LABEL: @test_vmulq_f64(
375 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
376 // CHECK: ret <2 x double> [[MUL_I]]
test_vmulq_f64(float64x2_t v1,float64x2_t v2)377 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
378 return vmulq_f64(v1, v2);
379 }
380
381 // CHECK-LABEL: @test_vmul_p8(
382 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
383 // CHECK: ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t v1,poly8x8_t v2)384 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
385 return vmul_p8(v1, v2);
386 }
387
388 // CHECK-LABEL: @test_vmulq_p8(
389 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
390 // CHECK: ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t v1,poly8x16_t v2)391 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
392 return vmulq_p8(v1, v2);
393 }
394
395 // CHECK-LABEL: @test_vmla_s8(
396 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
397 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
398 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)399 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
400 return vmla_s8(v1, v2, v3);
401 }
402
403 // CHECK-LABEL: @test_vmla_s16(
404 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
405 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
406 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
407 // CHECK: ret <8 x i8> [[TMP0]]
test_vmla_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)408 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
409 return vmla_s16(v1, v2, v3);
410 }
411
412 // CHECK-LABEL: @test_vmla_s32(
413 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
414 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
415 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)416 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
417 return vmla_s32(v1, v2, v3);
418 }
419
420 // CHECK-LABEL: @test_vmla_f32(
421 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
422 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
423 // CHECK: ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)424 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
425 return vmla_f32(v1, v2, v3);
426 }
427
428 // CHECK-LABEL: @test_vmla_u8(
429 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
430 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
431 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)432 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
433 return vmla_u8(v1, v2, v3);
434 }
435
436 // CHECK-LABEL: @test_vmla_u16(
437 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
438 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
439 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)440 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
441 return vmla_u16(v1, v2, v3);
442 }
443
444 // CHECK-LABEL: @test_vmla_u32(
445 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
446 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
447 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)448 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
449 return vmla_u32(v1, v2, v3);
450 }
451
452 // CHECK-LABEL: @test_vmlaq_s8(
453 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
454 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
455 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)456 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
457 return vmlaq_s8(v1, v2, v3);
458 }
459
460 // CHECK-LABEL: @test_vmlaq_s16(
461 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
462 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
463 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)464 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
465 return vmlaq_s16(v1, v2, v3);
466 }
467
468 // CHECK-LABEL: @test_vmlaq_s32(
469 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
470 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
471 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)472 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
473 return vmlaq_s32(v1, v2, v3);
474 }
475
476 // CHECK-LABEL: @test_vmlaq_f32(
477 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
478 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
479 // CHECK: ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)480 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
481 return vmlaq_f32(v1, v2, v3);
482 }
483
484 // CHECK-LABEL: @test_vmlaq_u8(
485 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
486 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
487 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)488 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
489 return vmlaq_u8(v1, v2, v3);
490 }
491
492 // CHECK-LABEL: @test_vmlaq_u16(
493 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
494 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
495 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)496 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
497 return vmlaq_u16(v1, v2, v3);
498 }
499
500 // CHECK-LABEL: @test_vmlaq_u32(
501 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
502 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
503 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)504 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
505 return vmlaq_u32(v1, v2, v3);
506 }
507
508 // CHECK-LABEL: @test_vmlaq_f64(
509 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
510 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
511 // CHECK: ret <2 x double> [[ADD_I]]
test_vmlaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)512 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
513 return vmlaq_f64(v1, v2, v3);
514 }
515
516 // CHECK-LABEL: @test_vmls_s8(
517 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
518 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
519 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)520 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
521 return vmls_s8(v1, v2, v3);
522 }
523
524 // CHECK-LABEL: @test_vmls_s16(
525 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
526 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
527 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
528 // CHECK: ret <8 x i8> [[TMP0]]
test_vmls_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)529 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
530 return vmls_s16(v1, v2, v3);
531 }
532
533 // CHECK-LABEL: @test_vmls_s32(
534 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
535 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
536 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)537 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
538 return vmls_s32(v1, v2, v3);
539 }
540
541 // CHECK-LABEL: @test_vmls_f32(
542 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
543 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
544 // CHECK: ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)545 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
546 return vmls_f32(v1, v2, v3);
547 }
548
549 // CHECK-LABEL: @test_vmls_u8(
550 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
551 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
552 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)553 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
554 return vmls_u8(v1, v2, v3);
555 }
556
557 // CHECK-LABEL: @test_vmls_u16(
558 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
559 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
560 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)561 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
562 return vmls_u16(v1, v2, v3);
563 }
564
565 // CHECK-LABEL: @test_vmls_u32(
566 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
567 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
568 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)569 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
570 return vmls_u32(v1, v2, v3);
571 }
572
573 // CHECK-LABEL: @test_vmlsq_s8(
574 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
575 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
576 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)577 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
578 return vmlsq_s8(v1, v2, v3);
579 }
580
581 // CHECK-LABEL: @test_vmlsq_s16(
582 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
583 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
584 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)585 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
586 return vmlsq_s16(v1, v2, v3);
587 }
588
589 // CHECK-LABEL: @test_vmlsq_s32(
590 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
591 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
592 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)593 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
594 return vmlsq_s32(v1, v2, v3);
595 }
596
597 // CHECK-LABEL: @test_vmlsq_f32(
598 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
599 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
600 // CHECK: ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)601 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
602 return vmlsq_f32(v1, v2, v3);
603 }
604
605 // CHECK-LABEL: @test_vmlsq_u8(
606 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
607 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
608 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)609 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
610 return vmlsq_u8(v1, v2, v3);
611 }
612
613 // CHECK-LABEL: @test_vmlsq_u16(
614 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
615 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
616 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)617 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
618 return vmlsq_u16(v1, v2, v3);
619 }
620
621 // CHECK-LABEL: @test_vmlsq_u32(
622 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
623 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
624 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)625 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
626 return vmlsq_u32(v1, v2, v3);
627 }
628
629 // CHECK-LABEL: @test_vmlsq_f64(
630 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
631 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
632 // CHECK: ret <2 x double> [[SUB_I]]
test_vmlsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)633 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
634 return vmlsq_f64(v1, v2, v3);
635 }
636
637 // CHECK-LABEL: @test_vfma_f32(
638 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
639 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
640 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
641 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
642 // CHECK: ret <2 x float> [[TMP3]]
test_vfma_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)643 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
644 return vfma_f32(v1, v2, v3);
645 }
646
647 // CHECK-LABEL: @test_vfmaq_f32(
648 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
649 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
650 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
651 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
652 // CHECK: ret <4 x float> [[TMP3]]
test_vfmaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)653 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
654 return vfmaq_f32(v1, v2, v3);
655 }
656
657 // CHECK-LABEL: @test_vfmaq_f64(
658 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
659 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
660 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
661 // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
662 // CHECK: ret <2 x double> [[TMP3]]
test_vfmaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)663 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
664 return vfmaq_f64(v1, v2, v3);
665 }
666
667 // CHECK-LABEL: @test_vfms_f32(
668 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2
669 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
670 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
671 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
672 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
673 // CHECK: ret <2 x float> [[TMP3]]
test_vfms_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)674 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
675 return vfms_f32(v1, v2, v3);
676 }
677
678 // CHECK-LABEL: @test_vfmsq_f32(
679 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2
680 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
681 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
682 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
683 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
684 // CHECK: ret <4 x float> [[TMP3]]
test_vfmsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)685 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
686 return vfmsq_f32(v1, v2, v3);
687 }
688
689 // CHECK-LABEL: @test_vfmsq_f64(
690 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2
691 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
692 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
693 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
694 // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
695 // CHECK: ret <2 x double> [[TMP3]]
test_vfmsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)696 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
697 return vfmsq_f64(v1, v2, v3);
698 }
699
700 // CHECK-LABEL: @test_vdivq_f64(
701 // CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
702 // CHECK: ret <2 x double> [[DIV_I]]
test_vdivq_f64(float64x2_t v1,float64x2_t v2)703 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
704 return vdivq_f64(v1, v2);
705 }
706
707 // CHECK-LABEL: @test_vdivq_f32(
708 // CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
709 // CHECK: ret <4 x float> [[DIV_I]]
test_vdivq_f32(float32x4_t v1,float32x4_t v2)710 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
711 return vdivq_f32(v1, v2);
712 }
713
714 // CHECK-LABEL: @test_vdiv_f32(
715 // CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
716 // CHECK: ret <2 x float> [[DIV_I]]
test_vdiv_f32(float32x2_t v1,float32x2_t v2)717 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
718 return vdiv_f32(v1, v2);
719 }
720
721 // CHECK-LABEL: @test_vaba_s8(
722 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
723 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
724 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)725 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
726 return vaba_s8(v1, v2, v3);
727 }
728
729 // CHECK-LABEL: @test_vaba_s16(
730 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
731 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
732 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
733 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
734 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)735 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
736 return vaba_s16(v1, v2, v3);
737 }
738
739 // CHECK-LABEL: @test_vaba_s32(
740 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
741 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
742 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
743 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
744 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)745 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
746 return vaba_s32(v1, v2, v3);
747 }
748
749 // CHECK-LABEL: @test_vaba_u8(
750 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
751 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
752 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)753 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
754 return vaba_u8(v1, v2, v3);
755 }
756
757 // CHECK-LABEL: @test_vaba_u16(
758 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
759 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
760 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
761 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
762 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)763 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
764 return vaba_u16(v1, v2, v3);
765 }
766
767 // CHECK-LABEL: @test_vaba_u32(
768 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
769 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
770 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
771 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
772 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)773 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
774 return vaba_u32(v1, v2, v3);
775 }
776
777 // CHECK-LABEL: @test_vabaq_s8(
778 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
779 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
780 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)781 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
782 return vabaq_s8(v1, v2, v3);
783 }
784
785 // CHECK-LABEL: @test_vabaq_s16(
786 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
787 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
788 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
789 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
790 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)791 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
792 return vabaq_s16(v1, v2, v3);
793 }
794
795 // CHECK-LABEL: @test_vabaq_s32(
796 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
797 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
798 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
799 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
800 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)801 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
802 return vabaq_s32(v1, v2, v3);
803 }
804
805 // CHECK-LABEL: @test_vabaq_u8(
806 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
807 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
808 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)809 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
810 return vabaq_u8(v1, v2, v3);
811 }
812
813 // CHECK-LABEL: @test_vabaq_u16(
814 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
815 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
816 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
817 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
818 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)819 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
820 return vabaq_u16(v1, v2, v3);
821 }
822
823 // CHECK-LABEL: @test_vabaq_u32(
824 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
825 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
826 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
827 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
828 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)829 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
830 return vabaq_u32(v1, v2, v3);
831 }
832
833 // CHECK-LABEL: @test_vabd_s8(
834 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
835 // CHECK: ret <8 x i8> [[VABD_I]]
test_vabd_s8(int8x8_t v1,int8x8_t v2)836 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
837 return vabd_s8(v1, v2);
838 }
839
840 // CHECK-LABEL: @test_vabd_s16(
841 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
842 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
843 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
844 // CHECK: ret <4 x i16> [[VABD2_I]]
test_vabd_s16(int16x4_t v1,int16x4_t v2)845 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
846 return vabd_s16(v1, v2);
847 }
848
849 // CHECK-LABEL: @test_vabd_s32(
850 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
851 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
852 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
853 // CHECK: ret <2 x i32> [[VABD2_I]]
test_vabd_s32(int32x2_t v1,int32x2_t v2)854 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
855 return vabd_s32(v1, v2);
856 }
857
858 // CHECK-LABEL: @test_vabd_u8(
859 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
860 // CHECK: ret <8 x i8> [[VABD_I]]
test_vabd_u8(uint8x8_t v1,uint8x8_t v2)861 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
862 return vabd_u8(v1, v2);
863 }
864
865 // CHECK-LABEL: @test_vabd_u16(
866 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
867 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
868 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
869 // CHECK: ret <4 x i16> [[VABD2_I]]
test_vabd_u16(uint16x4_t v1,uint16x4_t v2)870 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
871 return vabd_u16(v1, v2);
872 }
873
874 // CHECK-LABEL: @test_vabd_u32(
875 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
876 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
877 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
878 // CHECK: ret <2 x i32> [[VABD2_I]]
test_vabd_u32(uint32x2_t v1,uint32x2_t v2)879 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
880 return vabd_u32(v1, v2);
881 }
882
883 // CHECK-LABEL: @test_vabd_f32(
884 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
885 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
886 // CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2)
887 // CHECK: ret <2 x float> [[VABD2_I]]
test_vabd_f32(float32x2_t v1,float32x2_t v2)888 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
889 return vabd_f32(v1, v2);
890 }
891
892 // CHECK-LABEL: @test_vabdq_s8(
893 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
894 // CHECK: ret <16 x i8> [[VABD_I]]
test_vabdq_s8(int8x16_t v1,int8x16_t v2)895 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
896 return vabdq_s8(v1, v2);
897 }
898
899 // CHECK-LABEL: @test_vabdq_s16(
900 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
901 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
902 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
903 // CHECK: ret <8 x i16> [[VABD2_I]]
test_vabdq_s16(int16x8_t v1,int16x8_t v2)904 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
905 return vabdq_s16(v1, v2);
906 }
907
908 // CHECK-LABEL: @test_vabdq_s32(
909 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
910 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
911 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
912 // CHECK: ret <4 x i32> [[VABD2_I]]
test_vabdq_s32(int32x4_t v1,int32x4_t v2)913 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
914 return vabdq_s32(v1, v2);
915 }
916
917 // CHECK-LABEL: @test_vabdq_u8(
918 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
919 // CHECK: ret <16 x i8> [[VABD_I]]
test_vabdq_u8(uint8x16_t v1,uint8x16_t v2)920 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
921 return vabdq_u8(v1, v2);
922 }
923
924 // CHECK-LABEL: @test_vabdq_u16(
925 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
926 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
927 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
928 // CHECK: ret <8 x i16> [[VABD2_I]]
test_vabdq_u16(uint16x8_t v1,uint16x8_t v2)929 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
930 return vabdq_u16(v1, v2);
931 }
932
933 // CHECK-LABEL: @test_vabdq_u32(
934 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
935 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
936 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
937 // CHECK: ret <4 x i32> [[VABD2_I]]
test_vabdq_u32(uint32x4_t v1,uint32x4_t v2)938 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
939 return vabdq_u32(v1, v2);
940 }
941
942 // CHECK-LABEL: @test_vabdq_f32(
943 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
944 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
945 // CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2)
946 // CHECK: ret <4 x float> [[VABD2_I]]
test_vabdq_f32(float32x4_t v1,float32x4_t v2)947 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
948 return vabdq_f32(v1, v2);
949 }
950
951 // CHECK-LABEL: @test_vabdq_f64(
952 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
953 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
954 // CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2)
955 // CHECK: ret <2 x double> [[VABD2_I]]
test_vabdq_f64(float64x2_t v1,float64x2_t v2)956 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
957 return vabdq_f64(v1, v2);
958 }
959
960 // CHECK-LABEL: @test_vbsl_s8(
961 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
962 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
963 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
964 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
965 // CHECK: ret <8 x i8> [[VBSL2_I]]
test_vbsl_s8(uint8x8_t v1,int8x8_t v2,int8x8_t v3)966 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
967 return vbsl_s8(v1, v2, v3);
968 }
969
970 // CHECK-LABEL: @test_vbsl_s16(
971 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
972 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
973 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
974 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
975 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
976 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
977 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
978 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
979 // CHECK: ret <8 x i8> [[TMP4]]
test_vbsl_s16(uint16x4_t v1,int16x4_t v2,int16x4_t v3)980 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
981 return vbsl_s16(v1, v2, v3);
982 }
983
984 // CHECK-LABEL: @test_vbsl_s32(
985 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
986 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
987 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
988 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
989 // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
990 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
991 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
992 // CHECK: ret <2 x i32> [[VBSL5_I]]
test_vbsl_s32(uint32x2_t v1,int32x2_t v2,int32x2_t v3)993 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
994 return vbsl_s32(v1, v2, v3);
995 }
996
997 // CHECK-LABEL: @test_vbsl_s64(
998 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
999 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1000 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1001 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1002 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1003 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1004 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1005 // CHECK: ret <1 x i64> [[VBSL5_I]]
test_vbsl_s64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1006 uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1007 return vbsl_s64(v1, v2, v3);
1008 }
1009
1010 // CHECK-LABEL: @test_vbsl_u8(
1011 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1012 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1013 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1014 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1015 // CHECK: ret <8 x i8> [[VBSL2_I]]
test_vbsl_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)1016 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
1017 return vbsl_u8(v1, v2, v3);
1018 }
1019
1020 // CHECK-LABEL: @test_vbsl_u16(
1021 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1022 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1023 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1024 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1025 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1026 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1027 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1028 // CHECK: ret <4 x i16> [[VBSL5_I]]
test_vbsl_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)1029 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
1030 return vbsl_u16(v1, v2, v3);
1031 }
1032
1033 // CHECK-LABEL: @test_vbsl_u32(
1034 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1035 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1036 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1037 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
1038 // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1039 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
1040 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1041 // CHECK: ret <2 x i32> [[VBSL5_I]]
test_vbsl_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)1042 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
1043 return vbsl_u32(v1, v2, v3);
1044 }
1045
1046 // CHECK-LABEL: @test_vbsl_u64(
1047 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1048 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1049 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1050 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1051 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1052 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1053 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1054 // CHECK: ret <1 x i64> [[VBSL5_I]]
test_vbsl_u64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1055 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1056 return vbsl_u64(v1, v2, v3);
1057 }
1058
1059 // CHECK-LABEL: @test_vbsl_f32(
1060 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32>
1061 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
1062 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1063 // CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1064 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1065 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1066 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[TMP0]], [[VBSL1_I]]
1067 // CHECK: [[TMP4:%.*]] = xor <2 x i32> [[TMP0]], <i32 -1, i32 -1>
1068 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1069 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1070 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1071 // CHECK: ret <2 x float> [[TMP5]]
test_vbsl_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)1072 float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
1073 return vbsl_f32(v1, v2, v3);
1074 }
1075
1076 // CHECK-LABEL: @test_vbsl_f64(
1077 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1078 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1079 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1080 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1081 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1082 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]]
1083 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1084 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1085 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1086 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1087 // CHECK: ret <1 x double> [[TMP4]]
test_vbsl_f64(uint64x1_t v1,float64x1_t v2,float64x1_t v3)1088 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
1089 return vbsl_f64(v1, v2, v3);
1090 }
1091
1092 // CHECK-LABEL: @test_vbsl_p8(
1093 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1094 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1095 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1096 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1097 // CHECK: ret <8 x i8> [[VBSL2_I]]
test_vbsl_p8(uint8x8_t v1,poly8x8_t v2,poly8x8_t v3)1098 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
1099 return vbsl_p8(v1, v2, v3);
1100 }
1101
1102 // CHECK-LABEL: @test_vbsl_p16(
1103 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1104 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1105 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1106 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1107 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1108 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1109 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1110 // CHECK: ret <4 x i16> [[VBSL5_I]]
test_vbsl_p16(uint16x4_t v1,poly16x4_t v2,poly16x4_t v3)1111 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
1112 return vbsl_p16(v1, v2, v3);
1113 }
1114
1115 // CHECK-LABEL: @test_vbslq_s8(
1116 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1117 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1118 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1119 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1120 // CHECK: ret <16 x i8> [[VBSL2_I]]
test_vbslq_s8(uint8x16_t v1,int8x16_t v2,int8x16_t v3)1121 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
1122 return vbslq_s8(v1, v2, v3);
1123 }
1124
1125 // CHECK-LABEL: @test_vbslq_s16(
1126 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1127 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1128 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1129 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1130 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1131 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1132 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1133 // CHECK: ret <8 x i16> [[VBSL5_I]]
test_vbslq_s16(uint16x8_t v1,int16x8_t v2,int16x8_t v3)1134 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
1135 return vbslq_s16(v1, v2, v3);
1136 }
1137
1138 // CHECK-LABEL: @test_vbslq_s32(
1139 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1140 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1141 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1142 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1143 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1144 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1145 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146 // CHECK: ret <4 x i32> [[VBSL5_I]]
test_vbslq_s32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1147 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1148 return vbslq_s32(v1, v2, v3);
1149 }
1150
1151 // CHECK-LABEL: @test_vbslq_s64(
1152 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1153 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1154 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1155 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1156 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1157 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1158 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1159 // CHECK: ret <2 x i64> [[VBSL5_I]]
test_vbslq_s64(uint64x2_t v1,int64x2_t v2,int64x2_t v3)1160 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
1161 return vbslq_s64(v1, v2, v3);
1162 }
1163
1164 // CHECK-LABEL: @test_vbslq_u8(
1165 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1166 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1167 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1168 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1169 // CHECK: ret <16 x i8> [[VBSL2_I]]
test_vbslq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)1170 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
1171 return vbslq_u8(v1, v2, v3);
1172 }
1173
1174 // CHECK-LABEL: @test_vbslq_u16(
1175 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1176 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1177 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1178 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1179 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1180 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1181 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1182 // CHECK: ret <8 x i16> [[VBSL5_I]]
test_vbslq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)1183 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
1184 return vbslq_u16(v1, v2, v3);
1185 }
1186
1187 // CHECK-LABEL: @test_vbslq_u32(
1188 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1189 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1190 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1191 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1192 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1193 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1194 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1195 // CHECK: ret <4 x i32> [[VBSL5_I]]
test_vbslq_u32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1196 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1197 return vbslq_s32(v1, v2, v3);
1198 }
1199
1200 // CHECK-LABEL: @test_vbslq_u64(
1201 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1202 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1203 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1204 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1205 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1206 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1207 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1208 // CHECK: ret <2 x i64> [[VBSL5_I]]
test_vbslq_u64(uint64x2_t v1,uint64x2_t v2,uint64x2_t v3)1209 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
1210 return vbslq_u64(v1, v2, v3);
1211 }
1212
1213 // CHECK-LABEL: @test_vbslq_f32(
1214 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1215 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1216 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1217 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1218 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1219 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]]
1220 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1221 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1222 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1223 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1224 // CHECK: ret <4 x float> [[TMP4]]
test_vbslq_f32(uint32x4_t v1,float32x4_t v2,float32x4_t v3)1225 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
1226 return vbslq_f32(v1, v2, v3);
1227 }
1228
1229 // CHECK-LABEL: @test_vbslq_p8(
1230 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1231 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1232 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1233 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1234 // CHECK: ret <16 x i8> [[VBSL2_I]]
test_vbslq_p8(uint8x16_t v1,poly8x16_t v2,poly8x16_t v3)1235 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
1236 return vbslq_p8(v1, v2, v3);
1237 }
1238
1239 // CHECK-LABEL: @test_vbslq_p16(
1240 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1241 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1242 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1243 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1244 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1245 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1246 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1247 // CHECK: ret <8 x i16> [[VBSL5_I]]
test_vbslq_p16(uint16x8_t v1,poly16x8_t v2,poly16x8_t v3)1248 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
1249 return vbslq_p16(v1, v2, v3);
1250 }
1251
1252 // CHECK-LABEL: @test_vbslq_f64(
1253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1254 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1255 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1256 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1257 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1258 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]]
1259 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1260 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1261 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1262 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1263 // CHECK: ret <2 x double> [[TMP4]]
test_vbslq_f64(uint64x2_t v1,float64x2_t v2,float64x2_t v3)1264 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
1265 return vbslq_f64(v1, v2, v3);
1266 }
1267
1268 // CHECK-LABEL: @test_vrecps_f32(
1269 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1270 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1271 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
1272 // CHECK: ret <2 x float> [[VRECPS_V2_I]]
test_vrecps_f32(float32x2_t v1,float32x2_t v2)1273 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
1274 return vrecps_f32(v1, v2);
1275 }
1276
1277 // CHECK-LABEL: @test_vrecpsq_f32(
1278 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1279 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1280 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
1281 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1282 // CHECK: ret <4 x float> [[VRECPSQ_V2_I]]
test_vrecpsq_f32(float32x4_t v1,float32x4_t v2)1283 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
1284 return vrecpsq_f32(v1, v2);
1285 }
1286
1287 // CHECK-LABEL: @test_vrecpsq_f64(
1288 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1289 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1290 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
1291 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1292 // CHECK: ret <2 x double> [[VRECPSQ_V2_I]]
test_vrecpsq_f64(float64x2_t v1,float64x2_t v2)1293 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
1294 return vrecpsq_f64(v1, v2);
1295 }
1296
1297 // CHECK-LABEL: @test_vrsqrts_f32(
1298 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1299 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1300 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
1301 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1302 // CHECK: ret <2 x float> [[VRSQRTS_V2_I]]
test_vrsqrts_f32(float32x2_t v1,float32x2_t v2)1303 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
1304 return vrsqrts_f32(v1, v2);
1305 }
1306
1307 // CHECK-LABEL: @test_vrsqrtsq_f32(
1308 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1309 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1310 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
1311 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1312 // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f32(float32x4_t v1,float32x4_t v2)1313 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
1314 return vrsqrtsq_f32(v1, v2);
1315 }
1316
1317 // CHECK-LABEL: @test_vrsqrtsq_f64(
1318 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1319 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1320 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
1321 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1322 // CHECK: ret <2 x double> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f64(float64x2_t v1,float64x2_t v2)1323 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
1324 return vrsqrtsq_f64(v1, v2);
1325 }
1326
1327 // CHECK-LABEL: @test_vcage_f32(
1328 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1329 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1330 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1331 // CHECK: ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t v1,float32x2_t v2)1332 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
1333 return vcage_f32(v1, v2);
1334 }
1335
1336 // CHECK-LABEL: @test_vcage_f64(
1337 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1338 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1339 // CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1340 // CHECK: ret <1 x i64> [[VCAGE_V2_I]]
test_vcage_f64(float64x1_t a,float64x1_t b)1341 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
1342 return vcage_f64(a, b);
1343 }
1344
1345 // CHECK-LABEL: @test_vcageq_f32(
1346 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1347 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1348 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1349 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t v1,float32x4_t v2)1350 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
1351 return vcageq_f32(v1, v2);
1352 }
1353
1354 // CHECK-LABEL: @test_vcageq_f64(
1355 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1356 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1358 // CHECK: ret <2 x i64> [[VCAGEQ_V2_I]]
test_vcageq_f64(float64x2_t v1,float64x2_t v2)1359 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
1360 return vcageq_f64(v1, v2);
1361 }
1362
1363 // CHECK-LABEL: @test_vcagt_f32(
1364 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1365 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1366 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1367 // CHECK: ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t v1,float32x2_t v2)1368 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
1369 return vcagt_f32(v1, v2);
1370 }
1371
1372 // CHECK-LABEL: @test_vcagt_f64(
1373 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1374 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1375 // CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1376 // CHECK: ret <1 x i64> [[VCAGT_V2_I]]
test_vcagt_f64(float64x1_t a,float64x1_t b)1377 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
1378 return vcagt_f64(a, b);
1379 }
1380
1381 // CHECK-LABEL: @test_vcagtq_f32(
1382 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1383 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1384 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1385 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t v1,float32x4_t v2)1386 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
1387 return vcagtq_f32(v1, v2);
1388 }
1389
1390 // CHECK-LABEL: @test_vcagtq_f64(
1391 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1392 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1393 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1394 // CHECK: ret <2 x i64> [[VCAGTQ_V2_I]]
test_vcagtq_f64(float64x2_t v1,float64x2_t v2)1395 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
1396 return vcagtq_f64(v1, v2);
1397 }
1398
1399 // CHECK-LABEL: @test_vcale_f32(
1400 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1401 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1402 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1403 // CHECK: ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t v1,float32x2_t v2)1404 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
1405 return vcale_f32(v1, v2);
1406 // Using registers other than v0, v1 are possible, but would be odd.
1407 }
1408
1409 // CHECK-LABEL: @test_vcale_f64(
1410 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1411 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1412 // CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1413 // CHECK: ret <1 x i64> [[VCALE_V2_I]]
test_vcale_f64(float64x1_t a,float64x1_t b)1414 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
1415 return vcale_f64(a, b);
1416 }
1417
1418 // CHECK-LABEL: @test_vcaleq_f32(
1419 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1420 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1421 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1422 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t v1,float32x4_t v2)1423 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
1424 return vcaleq_f32(v1, v2);
1425 // Using registers other than v0, v1 are possible, but would be odd.
1426 }
1427
1428 // CHECK-LABEL: @test_vcaleq_f64(
1429 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1430 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1431 // CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1432 // CHECK: ret <2 x i64> [[VCALEQ_V2_I]]
test_vcaleq_f64(float64x2_t v1,float64x2_t v2)1433 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
1434 return vcaleq_f64(v1, v2);
1435 // Using registers other than v0, v1 are possible, but would be odd.
1436 }
1437
1438 // CHECK-LABEL: @test_vcalt_f32(
1439 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1440 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1441 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1442 // CHECK: ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t v1,float32x2_t v2)1443 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
1444 return vcalt_f32(v1, v2);
1445 // Using registers other than v0, v1 are possible, but would be odd.
1446 }
1447
1448 // CHECK-LABEL: @test_vcalt_f64(
1449 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1450 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1451 // CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1452 // CHECK: ret <1 x i64> [[VCALT_V2_I]]
test_vcalt_f64(float64x1_t a,float64x1_t b)1453 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
1454 return vcalt_f64(a, b);
1455 }
1456
1457 // CHECK-LABEL: @test_vcaltq_f32(
1458 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1459 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1460 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1461 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t v1,float32x4_t v2)1462 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
1463 return vcaltq_f32(v1, v2);
1464 // Using registers other than v0, v1 are possible, but would be odd.
1465 }
1466
1467 // CHECK-LABEL: @test_vcaltq_f64(
1468 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1469 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1470 // CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1471 // CHECK: ret <2 x i64> [[VCALTQ_V2_I]]
test_vcaltq_f64(float64x2_t v1,float64x2_t v2)1472 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
1473 return vcaltq_f64(v1, v2);
1474 // Using registers other than v0, v1 are possible, but would be odd.
1475 }
1476
1477 // CHECK-LABEL: @test_vtst_s8(
1478 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1479 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1480 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1481 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t v1,int8x8_t v2)1482 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
1483 return vtst_s8(v1, v2);
1484 }
1485
1486 // CHECK-LABEL: @test_vtst_s16(
1487 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1488 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1489 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1490 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1491 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1492 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t v1,int16x4_t v2)1493 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
1494 return vtst_s16(v1, v2);
1495 }
1496
1497 // CHECK-LABEL: @test_vtst_s32(
1498 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1499 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1500 // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1501 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1502 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1503 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t v1,int32x2_t v2)1504 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
1505 return vtst_s32(v1, v2);
1506 }
1507
1508 // CHECK-LABEL: @test_vtst_u8(
1509 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1510 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1511 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1512 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t v1,uint8x8_t v2)1513 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
1514 return vtst_u8(v1, v2);
1515 }
1516
1517 // CHECK-LABEL: @test_vtst_u16(
1518 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1519 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1520 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1521 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1522 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1523 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t v1,uint16x4_t v2)1524 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
1525 return vtst_u16(v1, v2);
1526 }
1527
1528 // CHECK-LABEL: @test_vtst_u32(
1529 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1530 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1531 // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1532 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1533 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1534 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t v1,uint32x2_t v2)1535 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
1536 return vtst_u32(v1, v2);
1537 }
1538
1539 // CHECK-LABEL: @test_vtstq_s8(
1540 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1541 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1542 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1543 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t v1,int8x16_t v2)1544 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
1545 return vtstq_s8(v1, v2);
1546 }
1547
1548 // CHECK-LABEL: @test_vtstq_s16(
1549 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1550 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1551 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1552 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1553 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1554 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t v1,int16x8_t v2)1555 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
1556 return vtstq_s16(v1, v2);
1557 }
1558
1559 // CHECK-LABEL: @test_vtstq_s32(
1560 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1561 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1562 // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1563 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1564 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1565 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t v1,int32x4_t v2)1566 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
1567 return vtstq_s32(v1, v2);
1568 }
1569
1570 // CHECK-LABEL: @test_vtstq_u8(
1571 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1572 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1573 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1574 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t v1,uint8x16_t v2)1575 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
1576 return vtstq_u8(v1, v2);
1577 }
1578
1579 // CHECK-LABEL: @test_vtstq_u16(
1580 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1581 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1582 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1583 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1584 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1585 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t v1,uint16x8_t v2)1586 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
1587 return vtstq_u16(v1, v2);
1588 }
1589
1590 // CHECK-LABEL: @test_vtstq_u32(
1591 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1592 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1593 // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1594 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1595 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1596 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t v1,uint32x4_t v2)1597 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
1598 return vtstq_u32(v1, v2);
1599 }
1600
1601 // CHECK-LABEL: @test_vtstq_s64(
1602 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1603 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1604 // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1605 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1606 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1607 // CHECK: ret <2 x i64> [[VTST_I]]
test_vtstq_s64(int64x2_t v1,int64x2_t v2)1608 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
1609 return vtstq_s64(v1, v2);
1610 }
1611
1612 // CHECK-LABEL: @test_vtstq_u64(
1613 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1614 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1615 // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1616 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1617 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1618 // CHECK: ret <2 x i64> [[VTST_I]]
test_vtstq_u64(uint64x2_t v1,uint64x2_t v2)1619 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
1620 return vtstq_u64(v1, v2);
1621 }
1622
1623 // CHECK-LABEL: @test_vtst_p8(
1624 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1625 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1626 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1627 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t v1,poly8x8_t v2)1628 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
1629 return vtst_p8(v1, v2);
1630 }
1631
1632 // CHECK-LABEL: @test_vtst_p16(
1633 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1634 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1635 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1636 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1637 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1638 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t v1,poly16x4_t v2)1639 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
1640 return vtst_p16(v1, v2);
1641 }
1642
1643 // CHECK-LABEL: @test_vtstq_p8(
1644 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1645 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1646 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1647 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t v1,poly8x16_t v2)1648 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
1649 return vtstq_p8(v1, v2);
1650 }
1651
1652 // CHECK-LABEL: @test_vtstq_p16(
1653 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1654 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1655 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1656 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1657 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1658 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t v1,poly16x8_t v2)1659 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
1660 return vtstq_p16(v1, v2);
1661 }
1662
1663 // CHECK-LABEL: @test_vtst_s64(
1664 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1665 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1666 // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b
1667 // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1668 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1669 // CHECK: ret <1 x i64> [[VTST_I]]
test_vtst_s64(int64x1_t a,int64x1_t b)1670 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
1671 return vtst_s64(a, b);
1672 }
1673
1674 // CHECK-LABEL: @test_vtst_u64(
1675 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1676 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1677 // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b
1678 // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1679 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1680 // CHECK: ret <1 x i64> [[VTST_I]]
test_vtst_u64(uint64x1_t a,uint64x1_t b)1681 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
1682 return vtst_u64(a, b);
1683 }
1684
1685 // CHECK-LABEL: @test_vceq_s8(
1686 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1687 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1688 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t v1,int8x8_t v2)1689 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
1690 return vceq_s8(v1, v2);
1691 }
1692
1693 // CHECK-LABEL: @test_vceq_s16(
1694 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1695 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1696 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t v1,int16x4_t v2)1697 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
1698 return vceq_s16(v1, v2);
1699 }
1700
1701 // CHECK-LABEL: @test_vceq_s32(
1702 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1703 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1704 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t v1,int32x2_t v2)1705 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
1706 return vceq_s32(v1, v2);
1707 }
1708
1709 // CHECK-LABEL: @test_vceq_s64(
1710 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1711 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1712 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_s64(int64x1_t a,int64x1_t b)1713 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
1714 return vceq_s64(a, b);
1715 }
1716
1717 // CHECK-LABEL: @test_vceq_u64(
1718 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1719 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1720 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_u64(uint64x1_t a,uint64x1_t b)1721 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
1722 return vceq_u64(a, b);
1723 }
1724
1725 // CHECK-LABEL: @test_vceq_f32(
1726 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1727 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1728 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t v1,float32x2_t v2)1729 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
1730 return vceq_f32(v1, v2);
1731 }
1732
1733 // CHECK-LABEL: @test_vceq_f64(
1734 // CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1735 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1736 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_f64(float64x1_t a,float64x1_t b)1737 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
1738 return vceq_f64(a, b);
1739 }
1740
1741 // CHECK-LABEL: @test_vceq_u8(
1742 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1743 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1744 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t v1,uint8x8_t v2)1745 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
1746 return vceq_u8(v1, v2);
1747 }
1748
1749 // CHECK-LABEL: @test_vceq_u16(
1750 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1751 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1752 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t v1,uint16x4_t v2)1753 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
1754 return vceq_u16(v1, v2);
1755 }
1756
1757 // CHECK-LABEL: @test_vceq_u32(
1758 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1759 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1760 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t v1,uint32x2_t v2)1761 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
1762 return vceq_u32(v1, v2);
1763 }
1764
1765 // CHECK-LABEL: @test_vceq_p8(
1766 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1767 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1768 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t v1,poly8x8_t v2)1769 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
1770 return vceq_p8(v1, v2);
1771 }
1772
1773 // CHECK-LABEL: @test_vceqq_s8(
1774 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1775 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1776 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t v1,int8x16_t v2)1777 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
1778 return vceqq_s8(v1, v2);
1779 }
1780
1781 // CHECK-LABEL: @test_vceqq_s16(
1782 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1783 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1784 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t v1,int16x8_t v2)1785 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
1786 return vceqq_s16(v1, v2);
1787 }
1788
1789 // CHECK-LABEL: @test_vceqq_s32(
1790 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1791 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1792 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t v1,int32x4_t v2)1793 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
1794 return vceqq_s32(v1, v2);
1795 }
1796
1797 // CHECK-LABEL: @test_vceqq_f32(
1798 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1799 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1800 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t v1,float32x4_t v2)1801 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
1802 return vceqq_f32(v1, v2);
1803 }
1804
1805 // CHECK-LABEL: @test_vceqq_u8(
1806 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1807 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1808 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t v1,uint8x16_t v2)1809 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
1810 return vceqq_u8(v1, v2);
1811 }
1812
1813 // CHECK-LABEL: @test_vceqq_u16(
1814 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1815 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1816 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t v1,uint16x8_t v2)1817 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
1818 return vceqq_u16(v1, v2);
1819 }
1820
1821 // CHECK-LABEL: @test_vceqq_u32(
1822 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1823 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1824 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t v1,uint32x4_t v2)1825 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
1826 return vceqq_u32(v1, v2);
1827 }
1828
1829 // CHECK-LABEL: @test_vceqq_p8(
1830 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1831 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1832 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t v1,poly8x16_t v2)1833 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
1834 return vceqq_p8(v1, v2);
1835 }
1836
1837 // CHECK-LABEL: @test_vceqq_s64(
1838 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1839 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1840 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_s64(int64x2_t v1,int64x2_t v2)1841 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
1842 return vceqq_s64(v1, v2);
1843 }
1844
1845 // CHECK-LABEL: @test_vceqq_u64(
1846 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1847 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1848 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_u64(uint64x2_t v1,uint64x2_t v2)1849 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
1850 return vceqq_u64(v1, v2);
1851 }
1852
1853 // CHECK-LABEL: @test_vceqq_f64(
1854 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
1855 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1856 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_f64(float64x2_t v1,float64x2_t v2)1857 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
1858 return vceqq_f64(v1, v2);
1859 }
1860
1861 // CHECK-LABEL: @test_vcge_s8(
1862 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
1863 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1864 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t v1,int8x8_t v2)1865 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
1866 return vcge_s8(v1, v2);
1867 }
1868
1869 // CHECK-LABEL: @test_vcge_s16(
1870 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
1871 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1872 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t v1,int16x4_t v2)1873 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
1874 return vcge_s16(v1, v2);
1875 }
1876
1877 // CHECK-LABEL: @test_vcge_s32(
1878 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
1879 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1880 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t v1,int32x2_t v2)1881 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
1882 return vcge_s32(v1, v2);
1883 }
1884
1885 // CHECK-LABEL: @test_vcge_s64(
1886 // CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
1887 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1888 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcge_s64(int64x1_t a,int64x1_t b)1889 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
1890 return vcge_s64(a, b);
1891 }
1892
1893 // CHECK-LABEL: @test_vcge_u64(
1894 // CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
1895 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1896 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcge_u64(uint64x1_t a,uint64x1_t b)1897 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
1898 return vcge_u64(a, b);
1899 }
1900
1901 // CHECK-LABEL: @test_vcge_f32(
1902 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
1903 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1904 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t v1,float32x2_t v2)1905 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
1906 return vcge_f32(v1, v2);
1907 }
1908
1909 // CHECK-LABEL: @test_vcge_f64(
1910 // CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
1911 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1912 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcge_f64(float64x1_t a,float64x1_t b)1913 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
1914 return vcge_f64(a, b);
1915 }
1916
1917 // CHECK-LABEL: @test_vcge_u8(
1918 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
1919 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1920 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t v1,uint8x8_t v2)1921 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
1922 return vcge_u8(v1, v2);
1923 }
1924
1925 // CHECK-LABEL: @test_vcge_u16(
1926 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
1927 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1928 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t v1,uint16x4_t v2)1929 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
1930 return vcge_u16(v1, v2);
1931 }
1932
1933 // CHECK-LABEL: @test_vcge_u32(
1934 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
1935 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1936 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t v1,uint32x2_t v2)1937 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
1938 return vcge_u32(v1, v2);
1939 }
1940
1941 // CHECK-LABEL: @test_vcgeq_s8(
1942 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
1943 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1944 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t v1,int8x16_t v2)1945 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
1946 return vcgeq_s8(v1, v2);
1947 }
1948
1949 // CHECK-LABEL: @test_vcgeq_s16(
1950 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
1951 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1952 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t v1,int16x8_t v2)1953 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
1954 return vcgeq_s16(v1, v2);
1955 }
1956
1957 // CHECK-LABEL: @test_vcgeq_s32(
1958 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
1959 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1960 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t v1,int32x4_t v2)1961 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
1962 return vcgeq_s32(v1, v2);
1963 }
1964
1965 // CHECK-LABEL: @test_vcgeq_f32(
1966 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
1967 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1968 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t v1,float32x4_t v2)1969 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
1970 return vcgeq_f32(v1, v2);
1971 }
1972
1973 // CHECK-LABEL: @test_vcgeq_u8(
1974 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
1975 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1976 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t v1,uint8x16_t v2)1977 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
1978 return vcgeq_u8(v1, v2);
1979 }
1980
1981 // CHECK-LABEL: @test_vcgeq_u16(
1982 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
1983 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1984 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t v1,uint16x8_t v2)1985 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
1986 return vcgeq_u16(v1, v2);
1987 }
1988
1989 // CHECK-LABEL: @test_vcgeq_u32(
1990 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
1991 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1992 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t v1,uint32x4_t v2)1993 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
1994 return vcgeq_u32(v1, v2);
1995 }
1996
1997 // CHECK-LABEL: @test_vcgeq_s64(
1998 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
1999 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2000 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgeq_s64(int64x2_t v1,int64x2_t v2)2001 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
2002 return vcgeq_s64(v1, v2);
2003 }
2004
2005 // CHECK-LABEL: @test_vcgeq_u64(
2006 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2007 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2008 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgeq_u64(uint64x2_t v1,uint64x2_t v2)2009 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
2010 return vcgeq_u64(v1, v2);
2011 }
2012
2013 // CHECK-LABEL: @test_vcgeq_f64(
2014 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2015 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2016 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgeq_f64(float64x2_t v1,float64x2_t v2)2017 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
2018 return vcgeq_f64(v1, v2);
2019 }
2020
2021 // CHECK-LABEL: @test_vcle_s8(
2022 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2023 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2024 // CHECK: ret <8 x i8> [[SEXT_I]]
2025 // Notes about vcle:
2026 // LE condition predicate implemented as GE, so check reversed operands.
2027 // Using registers other than v0, v1 are possible, but would be odd.
test_vcle_s8(int8x8_t v1,int8x8_t v2)2028 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
2029 return vcle_s8(v1, v2);
2030 }
2031
2032 // CHECK-LABEL: @test_vcle_s16(
2033 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2034 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2035 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t v1,int16x4_t v2)2036 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
2037 return vcle_s16(v1, v2);
2038 }
2039
2040 // CHECK-LABEL: @test_vcle_s32(
2041 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2042 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2043 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t v1,int32x2_t v2)2044 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
2045 return vcle_s32(v1, v2);
2046 }
2047
2048 // CHECK-LABEL: @test_vcle_s64(
2049 // CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2050 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2051 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcle_s64(int64x1_t a,int64x1_t b)2052 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
2053 return vcle_s64(a, b);
2054 }
2055
2056 // CHECK-LABEL: @test_vcle_u64(
2057 // CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2058 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2059 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcle_u64(uint64x1_t a,uint64x1_t b)2060 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
2061 return vcle_u64(a, b);
2062 }
2063
2064 // CHECK-LABEL: @test_vcle_f32(
2065 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2066 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2067 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t v1,float32x2_t v2)2068 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
2069 return vcle_f32(v1, v2);
2070 }
2071
2072 // CHECK-LABEL: @test_vcle_f64(
2073 // CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2074 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2075 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcle_f64(float64x1_t a,float64x1_t b)2076 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
2077 return vcle_f64(a, b);
2078 }
2079
2080 // CHECK-LABEL: @test_vcle_u8(
2081 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2082 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2083 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t v1,uint8x8_t v2)2084 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
2085 return vcle_u8(v1, v2);
2086 }
2087
2088 // CHECK-LABEL: @test_vcle_u16(
2089 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2090 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2091 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t v1,uint16x4_t v2)2092 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
2093 return vcle_u16(v1, v2);
2094 }
2095
2096 // CHECK-LABEL: @test_vcle_u32(
2097 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2098 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2099 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t v1,uint32x2_t v2)2100 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
2101 return vcle_u32(v1, v2);
2102 }
2103
2104 // CHECK-LABEL: @test_vcleq_s8(
2105 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2106 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2107 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t v1,int8x16_t v2)2108 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
2109 return vcleq_s8(v1, v2);
2110 }
2111
2112 // CHECK-LABEL: @test_vcleq_s16(
2113 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2114 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2115 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t v1,int16x8_t v2)2116 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
2117 return vcleq_s16(v1, v2);
2118 }
2119
2120 // CHECK-LABEL: @test_vcleq_s32(
2121 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2122 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2123 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t v1,int32x4_t v2)2124 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
2125 return vcleq_s32(v1, v2);
2126 }
2127
2128 // CHECK-LABEL: @test_vcleq_f32(
2129 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2130 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2131 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t v1,float32x4_t v2)2132 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
2133 return vcleq_f32(v1, v2);
2134 }
2135
2136 // CHECK-LABEL: @test_vcleq_u8(
2137 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2138 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2139 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t v1,uint8x16_t v2)2140 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
2141 return vcleq_u8(v1, v2);
2142 }
2143
2144 // CHECK-LABEL: @test_vcleq_u16(
2145 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2146 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2147 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t v1,uint16x8_t v2)2148 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
2149 return vcleq_u16(v1, v2);
2150 }
2151
2152 // CHECK-LABEL: @test_vcleq_u32(
2153 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2154 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2155 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t v1,uint32x4_t v2)2156 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
2157 return vcleq_u32(v1, v2);
2158 }
2159
2160 // CHECK-LABEL: @test_vcleq_s64(
2161 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2162 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2163 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcleq_s64(int64x2_t v1,int64x2_t v2)2164 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
2165 return vcleq_s64(v1, v2);
2166 }
2167
2168 // CHECK-LABEL: @test_vcleq_u64(
2169 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2170 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2171 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcleq_u64(uint64x2_t v1,uint64x2_t v2)2172 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
2173 return vcleq_u64(v1, v2);
2174 }
2175
2176 // CHECK-LABEL: @test_vcleq_f64(
2177 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2178 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2179 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcleq_f64(float64x2_t v1,float64x2_t v2)2180 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
2181 return vcleq_f64(v1, v2);
2182 }
2183
2184 // CHECK-LABEL: @test_vcgt_s8(
2185 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2186 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2187 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t v1,int8x8_t v2)2188 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
2189 return vcgt_s8(v1, v2);
2190 }
2191
2192 // CHECK-LABEL: @test_vcgt_s16(
2193 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2194 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2195 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t v1,int16x4_t v2)2196 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
2197 return vcgt_s16(v1, v2);
2198 }
2199
2200 // CHECK-LABEL: @test_vcgt_s32(
2201 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2202 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2203 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t v1,int32x2_t v2)2204 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
2205 return vcgt_s32(v1, v2);
2206 }
2207
2208 // CHECK-LABEL: @test_vcgt_s64(
2209 // CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2210 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2211 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcgt_s64(int64x1_t a,int64x1_t b)2212 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
2213 return vcgt_s64(a, b);
2214 }
2215
2216 // CHECK-LABEL: @test_vcgt_u64(
2217 // CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2218 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2219 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcgt_u64(uint64x1_t a,uint64x1_t b)2220 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
2221 return vcgt_u64(a, b);
2222 }
2223
2224 // CHECK-LABEL: @test_vcgt_f32(
2225 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2226 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2227 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t v1,float32x2_t v2)2228 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
2229 return vcgt_f32(v1, v2);
2230 }
2231
2232 // CHECK-LABEL: @test_vcgt_f64(
2233 // CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2234 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2235 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcgt_f64(float64x1_t a,float64x1_t b)2236 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
2237 return vcgt_f64(a, b);
2238 }
2239
2240 // CHECK-LABEL: @test_vcgt_u8(
2241 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2242 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2243 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t v1,uint8x8_t v2)2244 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
2245 return vcgt_u8(v1, v2);
2246 }
2247
2248 // CHECK-LABEL: @test_vcgt_u16(
2249 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2250 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2251 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t v1,uint16x4_t v2)2252 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
2253 return vcgt_u16(v1, v2);
2254 }
2255
2256 // CHECK-LABEL: @test_vcgt_u32(
2257 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2258 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2259 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t v1,uint32x2_t v2)2260 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
2261 return vcgt_u32(v1, v2);
2262 }
2263
2264 // CHECK-LABEL: @test_vcgtq_s8(
2265 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2266 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2267 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t v1,int8x16_t v2)2268 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
2269 return vcgtq_s8(v1, v2);
2270 }
2271
2272 // CHECK-LABEL: @test_vcgtq_s16(
2273 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2274 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2275 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t v1,int16x8_t v2)2276 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
2277 return vcgtq_s16(v1, v2);
2278 }
2279
2280 // CHECK-LABEL: @test_vcgtq_s32(
2281 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2282 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2283 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t v1,int32x4_t v2)2284 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
2285 return vcgtq_s32(v1, v2);
2286 }
2287
2288 // CHECK-LABEL: @test_vcgtq_f32(
2289 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2290 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2291 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t v1,float32x4_t v2)2292 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
2293 return vcgtq_f32(v1, v2);
2294 }
2295
2296 // CHECK-LABEL: @test_vcgtq_u8(
2297 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2298 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2299 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t v1,uint8x16_t v2)2300 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
2301 return vcgtq_u8(v1, v2);
2302 }
2303
2304 // CHECK-LABEL: @test_vcgtq_u16(
2305 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2306 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2307 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t v1,uint16x8_t v2)2308 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
2309 return vcgtq_u16(v1, v2);
2310 }
2311
2312 // CHECK-LABEL: @test_vcgtq_u32(
2313 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2314 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2315 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t v1,uint32x4_t v2)2316 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
2317 return vcgtq_u32(v1, v2);
2318 }
2319
2320 // CHECK-LABEL: @test_vcgtq_s64(
2321 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2322 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2323 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgtq_s64(int64x2_t v1,int64x2_t v2)2324 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
2325 return vcgtq_s64(v1, v2);
2326 }
2327
2328 // CHECK-LABEL: @test_vcgtq_u64(
2329 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2330 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2331 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgtq_u64(uint64x2_t v1,uint64x2_t v2)2332 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
2333 return vcgtq_u64(v1, v2);
2334 }
2335
2336 // CHECK-LABEL: @test_vcgtq_f64(
2337 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2338 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2339 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgtq_f64(float64x2_t v1,float64x2_t v2)2340 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
2341 return vcgtq_f64(v1, v2);
2342 }
2343
2344 // CHECK-LABEL: @test_vclt_s8(
2345 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2346 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2347 // CHECK: ret <8 x i8> [[SEXT_I]]
2348 // Notes about vclt:
2349 // LT condition predicate implemented as GT, so check reversed operands.
2350 // Using registers other than v0, v1 are possible, but would be odd.
test_vclt_s8(int8x8_t v1,int8x8_t v2)2351 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
2352 return vclt_s8(v1, v2);
2353 }
2354
2355 // CHECK-LABEL: @test_vclt_s16(
2356 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2357 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2358 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t v1,int16x4_t v2)2359 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
2360 return vclt_s16(v1, v2);
2361 }
2362
2363 // CHECK-LABEL: @test_vclt_s32(
2364 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2365 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2366 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t v1,int32x2_t v2)2367 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
2368 return vclt_s32(v1, v2);
2369 }
2370
2371 // CHECK-LABEL: @test_vclt_s64(
2372 // CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2373 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2374 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vclt_s64(int64x1_t a,int64x1_t b)2375 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
2376 return vclt_s64(a, b);
2377 }
2378
2379 // CHECK-LABEL: @test_vclt_u64(
2380 // CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2381 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2382 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vclt_u64(uint64x1_t a,uint64x1_t b)2383 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
2384 return vclt_u64(a, b);
2385 }
2386
2387 // CHECK-LABEL: @test_vclt_f32(
2388 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2389 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2390 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t v1,float32x2_t v2)2391 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
2392 return vclt_f32(v1, v2);
2393 }
2394
2395 // CHECK-LABEL: @test_vclt_f64(
2396 // CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2397 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2398 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vclt_f64(float64x1_t a,float64x1_t b)2399 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
2400 return vclt_f64(a, b);
2401 }
2402
2403 // CHECK-LABEL: @test_vclt_u8(
2404 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2405 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2406 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t v1,uint8x8_t v2)2407 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
2408 return vclt_u8(v1, v2);
2409 }
2410
2411 // CHECK-LABEL: @test_vclt_u16(
2412 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2413 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2414 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t v1,uint16x4_t v2)2415 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
2416 return vclt_u16(v1, v2);
2417 }
2418
2419 // CHECK-LABEL: @test_vclt_u32(
2420 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2421 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2422 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t v1,uint32x2_t v2)2423 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
2424 return vclt_u32(v1, v2);
2425 }
2426
2427 // CHECK-LABEL: @test_vcltq_s8(
2428 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2429 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2430 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t v1,int8x16_t v2)2431 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
2432 return vcltq_s8(v1, v2);
2433 }
2434
2435 // CHECK-LABEL: @test_vcltq_s16(
2436 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2437 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2438 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t v1,int16x8_t v2)2439 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
2440 return vcltq_s16(v1, v2);
2441 }
2442
2443 // CHECK-LABEL: @test_vcltq_s32(
2444 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2445 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2446 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t v1,int32x4_t v2)2447 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
2448 return vcltq_s32(v1, v2);
2449 }
2450
2451 // CHECK-LABEL: @test_vcltq_f32(
2452 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2453 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2454 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t v1,float32x4_t v2)2455 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
2456 return vcltq_f32(v1, v2);
2457 }
2458
2459 // CHECK-LABEL: @test_vcltq_u8(
2460 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2461 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2462 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t v1,uint8x16_t v2)2463 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
2464 return vcltq_u8(v1, v2);
2465 }
2466
2467 // CHECK-LABEL: @test_vcltq_u16(
2468 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2469 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2470 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t v1,uint16x8_t v2)2471 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
2472 return vcltq_u16(v1, v2);
2473 }
2474
2475 // CHECK-LABEL: @test_vcltq_u32(
2476 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2477 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2478 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t v1,uint32x4_t v2)2479 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
2480 return vcltq_u32(v1, v2);
2481 }
2482
2483 // CHECK-LABEL: @test_vcltq_s64(
2484 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2485 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2486 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcltq_s64(int64x2_t v1,int64x2_t v2)2487 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
2488 return vcltq_s64(v1, v2);
2489 }
2490
2491 // CHECK-LABEL: @test_vcltq_u64(
2492 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2493 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2494 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcltq_u64(uint64x2_t v1,uint64x2_t v2)2495 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
2496 return vcltq_u64(v1, v2);
2497 }
2498
2499 // CHECK-LABEL: @test_vcltq_f64(
2500 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2501 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2502 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcltq_f64(float64x2_t v1,float64x2_t v2)2503 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
2504 return vcltq_f64(v1, v2);
2505 }
2506
2507 // CHECK-LABEL: @test_vhadd_s8(
2508 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2509 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t v1,int8x8_t v2)2510 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
2511 return vhadd_s8(v1, v2);
2512 }
2513
2514 // CHECK-LABEL: @test_vhadd_s16(
2515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2516 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2517 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2518 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2519 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_s16(int16x4_t v1,int16x4_t v2)2520 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
2521 return vhadd_s16(v1, v2);
2522 }
2523
2524 // CHECK-LABEL: @test_vhadd_s32(
2525 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2526 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2527 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2528 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2529 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_s32(int32x2_t v1,int32x2_t v2)2530 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
2531 return vhadd_s32(v1, v2);
2532 }
2533
2534 // CHECK-LABEL: @test_vhadd_u8(
2535 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2536 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t v1,uint8x8_t v2)2537 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2538 return vhadd_u8(v1, v2);
2539 }
2540
2541 // CHECK-LABEL: @test_vhadd_u16(
2542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2543 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2544 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2545 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2546 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_u16(uint16x4_t v1,uint16x4_t v2)2547 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2548 return vhadd_u16(v1, v2);
2549 }
2550
2551 // CHECK-LABEL: @test_vhadd_u32(
2552 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2553 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2554 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2555 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2556 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_u32(uint32x2_t v1,uint32x2_t v2)2557 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2558 return vhadd_u32(v1, v2);
2559 }
2560
2561 // CHECK-LABEL: @test_vhaddq_s8(
2562 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2563 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t v1,int8x16_t v2)2564 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
2565 return vhaddq_s8(v1, v2);
2566 }
2567
2568 // CHECK-LABEL: @test_vhaddq_s16(
2569 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2571 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2572 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2573 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_s16(int16x8_t v1,int16x8_t v2)2574 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
2575 return vhaddq_s16(v1, v2);
2576 }
2577
2578 // CHECK-LABEL: @test_vhaddq_s32(
2579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2580 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2581 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2582 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2583 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_s32(int32x4_t v1,int32x4_t v2)2584 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
2585 return vhaddq_s32(v1, v2);
2586 }
2587
2588 // CHECK-LABEL: @test_vhaddq_u8(
2589 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2590 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t v1,uint8x16_t v2)2591 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2592 return vhaddq_u8(v1, v2);
2593 }
2594
2595 // CHECK-LABEL: @test_vhaddq_u16(
2596 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2597 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2598 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2599 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2600 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_u16(uint16x8_t v1,uint16x8_t v2)2601 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2602 return vhaddq_u16(v1, v2);
2603 }
2604
2605 // CHECK-LABEL: @test_vhaddq_u32(
2606 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2607 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2608 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2609 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2610 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_u32(uint32x4_t v1,uint32x4_t v2)2611 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2612 return vhaddq_u32(v1, v2);
2613 }
2614
2615 // CHECK-LABEL: @test_vhsub_s8(
2616 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2617 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t v1,int8x8_t v2)2618 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
2619 return vhsub_s8(v1, v2);
2620 }
2621
2622 // CHECK-LABEL: @test_vhsub_s16(
2623 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2624 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2625 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2626 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2627 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_s16(int16x4_t v1,int16x4_t v2)2628 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
2629 return vhsub_s16(v1, v2);
2630 }
2631
2632 // CHECK-LABEL: @test_vhsub_s32(
2633 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2634 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2635 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2636 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2637 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_s32(int32x2_t v1,int32x2_t v2)2638 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
2639 return vhsub_s32(v1, v2);
2640 }
2641
2642 // CHECK-LABEL: @test_vhsub_u8(
2643 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2644 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t v1,uint8x8_t v2)2645 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
2646 return vhsub_u8(v1, v2);
2647 }
2648
2649 // CHECK-LABEL: @test_vhsub_u16(
2650 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2651 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2652 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2653 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2654 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_u16(uint16x4_t v1,uint16x4_t v2)2655 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
2656 return vhsub_u16(v1, v2);
2657 }
2658
2659 // CHECK-LABEL: @test_vhsub_u32(
2660 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2661 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2662 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2663 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2664 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_u32(uint32x2_t v1,uint32x2_t v2)2665 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
2666 return vhsub_u32(v1, v2);
2667 }
2668
2669 // CHECK-LABEL: @test_vhsubq_s8(
2670 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2671 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t v1,int8x16_t v2)2672 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
2673 return vhsubq_s8(v1, v2);
2674 }
2675
2676 // CHECK-LABEL: @test_vhsubq_s16(
2677 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2678 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2679 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2680 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2681 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_s16(int16x8_t v1,int16x8_t v2)2682 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
2683 return vhsubq_s16(v1, v2);
2684 }
2685
2686 // CHECK-LABEL: @test_vhsubq_s32(
2687 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2688 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2689 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2690 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2691 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_s32(int32x4_t v1,int32x4_t v2)2692 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
2693 return vhsubq_s32(v1, v2);
2694 }
2695
2696 // CHECK-LABEL: @test_vhsubq_u8(
2697 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2698 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t v1,uint8x16_t v2)2699 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
2700 return vhsubq_u8(v1, v2);
2701 }
2702
2703 // CHECK-LABEL: @test_vhsubq_u16(
2704 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2705 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2706 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2707 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2708 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_u16(uint16x8_t v1,uint16x8_t v2)2709 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
2710 return vhsubq_u16(v1, v2);
2711 }
2712
2713 // CHECK-LABEL: @test_vhsubq_u32(
2714 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2715 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2716 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2717 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2718 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_u32(uint32x4_t v1,uint32x4_t v2)2719 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
2720 return vhsubq_u32(v1, v2);
2721 }
2722
2723 // CHECK-LABEL: @test_vrhadd_s8(
2724 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2725 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t v1,int8x8_t v2)2726 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
2727 return vrhadd_s8(v1, v2);
2728 }
2729
2730 // CHECK-LABEL: @test_vrhadd_s16(
2731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2732 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2733 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2734 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2735 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_s16(int16x4_t v1,int16x4_t v2)2736 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
2737 return vrhadd_s16(v1, v2);
2738 }
2739
2740 // CHECK-LABEL: @test_vrhadd_s32(
2741 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2742 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2743 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2744 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2745 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_s32(int32x2_t v1,int32x2_t v2)2746 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
2747 return vrhadd_s32(v1, v2);
2748 }
2749
2750 // CHECK-LABEL: @test_vrhadd_u8(
2751 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2752 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t v1,uint8x8_t v2)2753 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2754 return vrhadd_u8(v1, v2);
2755 }
2756
2757 // CHECK-LABEL: @test_vrhadd_u16(
2758 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2759 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2760 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2761 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2762 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_u16(uint16x4_t v1,uint16x4_t v2)2763 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2764 return vrhadd_u16(v1, v2);
2765 }
2766
2767 // CHECK-LABEL: @test_vrhadd_u32(
2768 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2769 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2770 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2771 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2772 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_u32(uint32x2_t v1,uint32x2_t v2)2773 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2774 return vrhadd_u32(v1, v2);
2775 }
2776
2777 // CHECK-LABEL: @test_vrhaddq_s8(
2778 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2779 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t v1,int8x16_t v2)2780 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
2781 return vrhaddq_s8(v1, v2);
2782 }
2783
2784 // CHECK-LABEL: @test_vrhaddq_s16(
2785 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2786 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2787 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2788 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2789 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_s16(int16x8_t v1,int16x8_t v2)2790 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
2791 return vrhaddq_s16(v1, v2);
2792 }
2793
2794 // CHECK-LABEL: @test_vrhaddq_s32(
2795 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2796 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2797 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2798 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2799 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_s32(int32x4_t v1,int32x4_t v2)2800 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
2801 return vrhaddq_s32(v1, v2);
2802 }
2803
2804 // CHECK-LABEL: @test_vrhaddq_u8(
2805 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2806 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t v1,uint8x16_t v2)2807 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2808 return vrhaddq_u8(v1, v2);
2809 }
2810
2811 // CHECK-LABEL: @test_vrhaddq_u16(
2812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2813 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2814 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2815 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2816 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_u16(uint16x8_t v1,uint16x8_t v2)2817 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2818 return vrhaddq_u16(v1, v2);
2819 }
2820
2821 // CHECK-LABEL: @test_vrhaddq_u32(
2822 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2823 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2824 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2825 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2826 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_u32(uint32x4_t v1,uint32x4_t v2)2827 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2828 return vrhaddq_u32(v1, v2);
2829 }
2830
2831 // CHECK-LABEL: @test_vqadd_s8(
2832 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2833 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)2834 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
2835 return vqadd_s8(a, b);
2836 }
2837
2838 // CHECK-LABEL: @test_vqadd_s16(
2839 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2840 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2841 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2842 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2843 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_s16(int16x4_t a,int16x4_t b)2844 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
2845 return vqadd_s16(a, b);
2846 }
2847
2848 // CHECK-LABEL: @test_vqadd_s32(
2849 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2850 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2851 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2852 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2853 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_s32(int32x2_t a,int32x2_t b)2854 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
2855 return vqadd_s32(a, b);
2856 }
2857
2858 // CHECK-LABEL: @test_vqadd_s64(
2859 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2860 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2861 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2862 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2863 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_s64(int64x1_t a,int64x1_t b)2864 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
2865 return vqadd_s64(a, b);
2866 }
2867
2868 // CHECK-LABEL: @test_vqadd_u8(
2869 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2870 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)2871 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
2872 return vqadd_u8(a, b);
2873 }
2874
2875 // CHECK-LABEL: @test_vqadd_u16(
2876 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2877 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2878 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2879 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2880 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)2881 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
2882 return vqadd_u16(a, b);
2883 }
2884
2885 // CHECK-LABEL: @test_vqadd_u32(
2886 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2887 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2888 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2889 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2890 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)2891 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
2892 return vqadd_u32(a, b);
2893 }
2894
2895 // CHECK-LABEL: @test_vqadd_u64(
2896 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2897 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2898 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2899 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2900 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)2901 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
2902 return vqadd_u64(a, b);
2903 }
2904
2905 // CHECK-LABEL: @test_vqaddq_s8(
2906 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2907 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)2908 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
2909 return vqaddq_s8(a, b);
2910 }
2911
2912 // CHECK-LABEL: @test_vqaddq_s16(
2913 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2914 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2915 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2916 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2917 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)2918 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
2919 return vqaddq_s16(a, b);
2920 }
2921
2922 // CHECK-LABEL: @test_vqaddq_s32(
2923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2925 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2926 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2927 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)2928 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
2929 return vqaddq_s32(a, b);
2930 }
2931
2932 // CHECK-LABEL: @test_vqaddq_s64(
2933 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2934 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2935 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2936 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2937 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)2938 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
2939 return vqaddq_s64(a, b);
2940 }
2941
2942 // CHECK-LABEL: @test_vqaddq_u8(
2943 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2944 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)2945 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
2946 return vqaddq_u8(a, b);
2947 }
2948
2949 // CHECK-LABEL: @test_vqaddq_u16(
2950 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2951 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2952 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2953 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2954 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)2955 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
2956 return vqaddq_u16(a, b);
2957 }
2958
2959 // CHECK-LABEL: @test_vqaddq_u32(
2960 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2961 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2962 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2963 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2964 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)2965 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
2966 return vqaddq_u32(a, b);
2967 }
2968
2969 // CHECK-LABEL: @test_vqaddq_u64(
2970 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2971 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2972 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2973 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2974 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)2975 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
2976 return vqaddq_u64(a, b);
2977 }
2978
2979 // CHECK-LABEL: @test_vqsub_s8(
2980 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
2981 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)2982 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
2983 return vqsub_s8(a, b);
2984 }
2985
2986 // CHECK-LABEL: @test_vqsub_s16(
2987 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2988 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2989 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
2990 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
2991 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_s16(int16x4_t a,int16x4_t b)2992 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
2993 return vqsub_s16(a, b);
2994 }
2995
2996 // CHECK-LABEL: @test_vqsub_s32(
2997 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2998 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2999 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3000 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3001 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_s32(int32x2_t a,int32x2_t b)3002 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
3003 return vqsub_s32(a, b);
3004 }
3005
3006 // CHECK-LABEL: @test_vqsub_s64(
3007 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3008 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3009 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3010 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3011 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_s64(int64x1_t a,int64x1_t b)3012 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
3013 return vqsub_s64(a, b);
3014 }
3015
3016 // CHECK-LABEL: @test_vqsub_u8(
3017 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
3018 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)3019 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
3020 return vqsub_u8(a, b);
3021 }
3022
3023 // CHECK-LABEL: @test_vqsub_u16(
3024 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3025 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3026 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
3027 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3028 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)3029 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
3030 return vqsub_u16(a, b);
3031 }
3032
3033 // CHECK-LABEL: @test_vqsub_u32(
3034 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3035 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3036 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3037 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3038 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)3039 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
3040 return vqsub_u32(a, b);
3041 }
3042
3043 // CHECK-LABEL: @test_vqsub_u64(
3044 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3045 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3046 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3047 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3048 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)3049 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
3050 return vqsub_u64(a, b);
3051 }
3052
3053 // CHECK-LABEL: @test_vqsubq_s8(
3054 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3055 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)3056 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
3057 return vqsubq_s8(a, b);
3058 }
3059
3060 // CHECK-LABEL: @test_vqsubq_s16(
3061 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3062 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3063 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3064 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3065 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)3066 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
3067 return vqsubq_s16(a, b);
3068 }
3069
3070 // CHECK-LABEL: @test_vqsubq_s32(
3071 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3072 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3073 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3074 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3075 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)3076 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
3077 return vqsubq_s32(a, b);
3078 }
3079
3080 // CHECK-LABEL: @test_vqsubq_s64(
3081 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3082 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3083 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3084 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3085 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)3086 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
3087 return vqsubq_s64(a, b);
3088 }
3089
3090 // CHECK-LABEL: @test_vqsubq_u8(
3091 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3092 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)3093 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
3094 return vqsubq_u8(a, b);
3095 }
3096
3097 // CHECK-LABEL: @test_vqsubq_u16(
3098 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3099 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3100 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3101 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3102 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)3103 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
3104 return vqsubq_u16(a, b);
3105 }
3106
3107 // CHECK-LABEL: @test_vqsubq_u32(
3108 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3109 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3110 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3111 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3112 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)3113 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
3114 return vqsubq_u32(a, b);
3115 }
3116
3117 // CHECK-LABEL: @test_vqsubq_u64(
3118 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3119 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3120 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3121 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3122 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)3123 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
3124 return vqsubq_u64(a, b);
3125 }
3126
3127 // CHECK-LABEL: @test_vshl_s8(
3128 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3129 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)3130 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
3131 return vshl_s8(a, b);
3132 }
3133
3134 // CHECK-LABEL: @test_vshl_s16(
3135 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3136 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3137 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3138 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3139 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
test_vshl_s16(int16x4_t a,int16x4_t b)3140 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
3141 return vshl_s16(a, b);
3142 }
3143
3144 // CHECK-LABEL: @test_vshl_s32(
3145 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3146 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3147 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3148 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3149 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
test_vshl_s32(int32x2_t a,int32x2_t b)3150 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
3151 return vshl_s32(a, b);
3152 }
3153
3154 // CHECK-LABEL: @test_vshl_s64(
3155 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3156 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3157 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3158 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3159 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
test_vshl_s64(int64x1_t a,int64x1_t b)3160 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
3161 return vshl_s64(a, b);
3162 }
3163
3164 // CHECK-LABEL: @test_vshl_u8(
3165 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b)
3166 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)3167 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
3168 return vshl_u8(a, b);
3169 }
3170
3171 // CHECK-LABEL: @test_vshl_u16(
3172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3173 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3174 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b)
3175 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3176 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
test_vshl_u16(uint16x4_t a,int16x4_t b)3177 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
3178 return vshl_u16(a, b);
3179 }
3180
3181 // CHECK-LABEL: @test_vshl_u32(
3182 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3183 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3184 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b)
3185 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3186 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
test_vshl_u32(uint32x2_t a,int32x2_t b)3187 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
3188 return vshl_u32(a, b);
3189 }
3190
3191 // CHECK-LABEL: @test_vshl_u64(
3192 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3193 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3194 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b)
3195 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3196 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
test_vshl_u64(uint64x1_t a,int64x1_t b)3197 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
3198 return vshl_u64(a, b);
3199 }
3200
3201 // CHECK-LABEL: @test_vshlq_s8(
3202 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3203 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)3204 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3205 return vshlq_s8(a, b);
3206 }
3207
3208 // CHECK-LABEL: @test_vshlq_s16(
3209 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3210 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3211 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3212 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3213 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_s16(int16x8_t a,int16x8_t b)3214 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3215 return vshlq_s16(a, b);
3216 }
3217
3218 // CHECK-LABEL: @test_vshlq_s32(
3219 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3220 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3221 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3222 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3223 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_s32(int32x4_t a,int32x4_t b)3224 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3225 return vshlq_s32(a, b);
3226 }
3227
3228 // CHECK-LABEL: @test_vshlq_s64(
3229 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3230 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3231 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3232 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3233 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_s64(int64x2_t a,int64x2_t b)3234 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3235 return vshlq_s64(a, b);
3236 }
3237
3238 // CHECK-LABEL: @test_vshlq_u8(
3239 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
3240 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)3241 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3242 return vshlq_u8(a, b);
3243 }
3244
3245 // CHECK-LABEL: @test_vshlq_u16(
3246 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3247 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3248 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
3249 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3250 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)3251 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3252 return vshlq_u16(a, b);
3253 }
3254
3255 // CHECK-LABEL: @test_vshlq_u32(
3256 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3257 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3258 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
3259 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3260 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)3261 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3262 return vshlq_u32(a, b);
3263 }
3264
3265 // CHECK-LABEL: @test_vshlq_u64(
3266 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3267 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3268 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
3269 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3270 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)3271 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3272 return vshlq_u64(a, b);
3273 }
3274
3275 // CHECK-LABEL: @test_vqshl_s8(
3276 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3277 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)3278 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
3279 return vqshl_s8(a, b);
3280 }
3281
3282 // CHECK-LABEL: @test_vqshl_s16(
3283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3286 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3287 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_s16(int16x4_t a,int16x4_t b)3288 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
3289 return vqshl_s16(a, b);
3290 }
3291
3292 // CHECK-LABEL: @test_vqshl_s32(
3293 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3294 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3295 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3296 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3297 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_s32(int32x2_t a,int32x2_t b)3298 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
3299 return vqshl_s32(a, b);
3300 }
3301
3302 // CHECK-LABEL: @test_vqshl_s64(
3303 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3304 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3305 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3306 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3307 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_s64(int64x1_t a,int64x1_t b)3308 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
3309 return vqshl_s64(a, b);
3310 }
3311
3312 // CHECK-LABEL: @test_vqshl_u8(
3313 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3314 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)3315 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
3316 return vqshl_u8(a, b);
3317 }
3318
3319 // CHECK-LABEL: @test_vqshl_u16(
3320 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3321 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3322 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3323 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3324 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)3325 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
3326 return vqshl_u16(a, b);
3327 }
3328
3329 // CHECK-LABEL: @test_vqshl_u32(
3330 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3331 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3332 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3333 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3334 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)3335 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
3336 return vqshl_u32(a, b);
3337 }
3338
3339 // CHECK-LABEL: @test_vqshl_u64(
3340 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3341 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3342 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3343 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3344 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)3345 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
3346 return vqshl_u64(a, b);
3347 }
3348
3349 // CHECK-LABEL: @test_vqshlq_s8(
3350 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3351 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)3352 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
3353 return vqshlq_s8(a, b);
3354 }
3355
3356 // CHECK-LABEL: @test_vqshlq_s16(
3357 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3358 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3359 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3360 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3361 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)3362 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
3363 return vqshlq_s16(a, b);
3364 }
3365
3366 // CHECK-LABEL: @test_vqshlq_s32(
3367 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3368 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3369 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3370 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3371 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)3372 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
3373 return vqshlq_s32(a, b);
3374 }
3375
3376 // CHECK-LABEL: @test_vqshlq_s64(
3377 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3378 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3379 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3380 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3381 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)3382 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
3383 return vqshlq_s64(a, b);
3384 }
3385
3386 // CHECK-LABEL: @test_vqshlq_u8(
3387 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3388 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)3389 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
3390 return vqshlq_u8(a, b);
3391 }
3392
3393 // CHECK-LABEL: @test_vqshlq_u16(
3394 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3395 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3396 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3397 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3398 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)3399 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
3400 return vqshlq_u16(a, b);
3401 }
3402
3403 // CHECK-LABEL: @test_vqshlq_u32(
3404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3405 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3406 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3407 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3408 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)3409 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
3410 return vqshlq_u32(a, b);
3411 }
3412
3413 // CHECK-LABEL: @test_vqshlq_u64(
3414 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3415 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3416 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3417 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3418 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)3419 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
3420 return vqshlq_u64(a, b);
3421 }
3422
3423 // CHECK-LABEL: @test_vrshl_s8(
3424 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3425 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)3426 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3427 return vrshl_s8(a, b);
3428 }
3429
3430 // CHECK-LABEL: @test_vrshl_s16(
3431 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3432 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3433 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3434 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3435 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_s16(int16x4_t a,int16x4_t b)3436 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3437 return vrshl_s16(a, b);
3438 }
3439
3440 // CHECK-LABEL: @test_vrshl_s32(
3441 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3442 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3443 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3444 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3445 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_s32(int32x2_t a,int32x2_t b)3446 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3447 return vrshl_s32(a, b);
3448 }
3449
3450 // CHECK-LABEL: @test_vrshl_s64(
3451 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3452 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3453 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3454 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3455 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_s64(int64x1_t a,int64x1_t b)3456 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3457 return vrshl_s64(a, b);
3458 }
3459
3460 // CHECK-LABEL: @test_vrshl_u8(
3461 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3462 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)3463 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3464 return vrshl_u8(a, b);
3465 }
3466
3467 // CHECK-LABEL: @test_vrshl_u16(
3468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3471 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3472 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)3473 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3474 return vrshl_u16(a, b);
3475 }
3476
3477 // CHECK-LABEL: @test_vrshl_u32(
3478 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3479 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3480 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3481 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3482 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)3483 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3484 return vrshl_u32(a, b);
3485 }
3486
3487 // CHECK-LABEL: @test_vrshl_u64(
3488 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3489 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3490 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3491 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3492 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)3493 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3494 return vrshl_u64(a, b);
3495 }
3496
3497 // CHECK-LABEL: @test_vrshlq_s8(
3498 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3499 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)3500 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3501 return vrshlq_s8(a, b);
3502 }
3503
3504 // CHECK-LABEL: @test_vrshlq_s16(
3505 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3506 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3507 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3508 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3509 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)3510 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3511 return vrshlq_s16(a, b);
3512 }
3513
3514 // CHECK-LABEL: @test_vrshlq_s32(
3515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3516 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3517 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3518 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3519 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)3520 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3521 return vrshlq_s32(a, b);
3522 }
3523
3524 // CHECK-LABEL: @test_vrshlq_s64(
3525 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3526 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3527 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3528 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3529 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)3530 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3531 return vrshlq_s64(a, b);
3532 }
3533
3534 // CHECK-LABEL: @test_vrshlq_u8(
3535 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3536 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)3537 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3538 return vrshlq_u8(a, b);
3539 }
3540
3541 // CHECK-LABEL: @test_vrshlq_u16(
3542 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3543 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3544 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3545 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3546 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)3547 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3548 return vrshlq_u16(a, b);
3549 }
3550
3551 // CHECK-LABEL: @test_vrshlq_u32(
3552 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3553 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3554 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3555 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3556 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)3557 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3558 return vrshlq_u32(a, b);
3559 }
3560
3561 // CHECK-LABEL: @test_vrshlq_u64(
3562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3563 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3564 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3565 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3566 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)3567 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3568 return vrshlq_u64(a, b);
3569 }
3570
3571 // CHECK-LABEL: @test_vqrshl_s8(
3572 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3573 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)3574 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
3575 return vqrshl_s8(a, b);
3576 }
3577
3578 // CHECK-LABEL: @test_vqrshl_s16(
3579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3580 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3581 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3582 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3583 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)3584 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
3585 return vqrshl_s16(a, b);
3586 }
3587
3588 // CHECK-LABEL: @test_vqrshl_s32(
3589 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3590 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3591 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3592 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3593 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)3594 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
3595 return vqrshl_s32(a, b);
3596 }
3597
3598 // CHECK-LABEL: @test_vqrshl_s64(
3599 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3600 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3601 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3602 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3603 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)3604 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
3605 return vqrshl_s64(a, b);
3606 }
3607
3608 // CHECK-LABEL: @test_vqrshl_u8(
3609 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3610 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)3611 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
3612 return vqrshl_u8(a, b);
3613 }
3614
3615 // CHECK-LABEL: @test_vqrshl_u16(
3616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3617 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3618 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3619 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3620 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)3621 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
3622 return vqrshl_u16(a, b);
3623 }
3624
3625 // CHECK-LABEL: @test_vqrshl_u32(
3626 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3627 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3628 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3629 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3630 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)3631 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
3632 return vqrshl_u32(a, b);
3633 }
3634
3635 // CHECK-LABEL: @test_vqrshl_u64(
3636 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3637 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3638 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3639 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3640 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)3641 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
3642 return vqrshl_u64(a, b);
3643 }
3644
3645 // CHECK-LABEL: @test_vqrshlq_s8(
3646 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3647 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)3648 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
3649 return vqrshlq_s8(a, b);
3650 }
3651
3652 // CHECK-LABEL: @test_vqrshlq_s16(
3653 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3654 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3655 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3656 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3657 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)3658 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
3659 return vqrshlq_s16(a, b);
3660 }
3661
3662 // CHECK-LABEL: @test_vqrshlq_s32(
3663 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3664 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3665 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3666 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3667 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)3668 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
3669 return vqrshlq_s32(a, b);
3670 }
3671
3672 // CHECK-LABEL: @test_vqrshlq_s64(
3673 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3674 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3675 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3676 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3677 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)3678 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
3679 return vqrshlq_s64(a, b);
3680 }
3681
3682 // CHECK-LABEL: @test_vqrshlq_u8(
3683 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3684 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)3685 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
3686 return vqrshlq_u8(a, b);
3687 }
3688
3689 // CHECK-LABEL: @test_vqrshlq_u16(
3690 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3691 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3692 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3693 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3694 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)3695 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
3696 return vqrshlq_u16(a, b);
3697 }
3698
3699 // CHECK-LABEL: @test_vqrshlq_u32(
3700 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3701 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3702 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3703 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3704 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)3705 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
3706 return vqrshlq_u32(a, b);
3707 }
3708
3709 // CHECK-LABEL: @test_vqrshlq_u64(
3710 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3711 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3712 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3713 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3714 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)3715 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
3716 return vqrshlq_u64(a, b);
3717 }
3718
3719 // CHECK-LABEL: @test_vsli_n_p64(
3720 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3721 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3722 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3723 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3724 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
3725 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_p64(poly64x1_t a,poly64x1_t b)3726 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
3727 return vsli_n_p64(a, b, 0);
3728 }
3729
3730 // CHECK-LABEL: @test_vsliq_n_p64(
3731 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3732 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3733 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3734 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3735 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
3736 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_p64(poly64x2_t a,poly64x2_t b)3737 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
3738 return vsliq_n_p64(a, b, 0);
3739 }
3740
3741 // CHECK-LABEL: @test_vmax_s8(
3742 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
3743 // CHECK: ret <8 x i8> [[VMAX_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)3744 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
3745 return vmax_s8(a, b);
3746 }
3747
3748 // CHECK-LABEL: @test_vmax_s16(
3749 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3750 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3751 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
3752 // CHECK: ret <4 x i16> [[VMAX2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)3753 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
3754 return vmax_s16(a, b);
3755 }
3756
3757 // CHECK-LABEL: @test_vmax_s32(
3758 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3759 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3760 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
3761 // CHECK: ret <2 x i32> [[VMAX2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)3762 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
3763 return vmax_s32(a, b);
3764 }
3765
3766 // CHECK-LABEL: @test_vmax_u8(
3767 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
3768 // CHECK: ret <8 x i8> [[VMAX_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)3769 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
3770 return vmax_u8(a, b);
3771 }
3772
3773 // CHECK-LABEL: @test_vmax_u16(
3774 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3775 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3776 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
3777 // CHECK: ret <4 x i16> [[VMAX2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)3778 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
3779 return vmax_u16(a, b);
3780 }
3781
3782 // CHECK-LABEL: @test_vmax_u32(
3783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3784 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3785 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
3786 // CHECK: ret <2 x i32> [[VMAX2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)3787 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
3788 return vmax_u32(a, b);
3789 }
3790
3791 // CHECK-LABEL: @test_vmax_f32(
3792 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3793 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3794 // CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
3795 // CHECK: ret <2 x float> [[VMAX2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)3796 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
3797 return vmax_f32(a, b);
3798 }
3799
3800 // CHECK-LABEL: @test_vmaxq_s8(
3801 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
3802 // CHECK: ret <16 x i8> [[VMAX_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)3803 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
3804 return vmaxq_s8(a, b);
3805 }
3806
3807 // CHECK-LABEL: @test_vmaxq_s16(
3808 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3809 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3810 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
3811 // CHECK: ret <8 x i16> [[VMAX2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)3812 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
3813 return vmaxq_s16(a, b);
3814 }
3815
3816 // CHECK-LABEL: @test_vmaxq_s32(
3817 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3818 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3819 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
3820 // CHECK: ret <4 x i32> [[VMAX2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)3821 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
3822 return vmaxq_s32(a, b);
3823 }
3824
3825 // CHECK-LABEL: @test_vmaxq_u8(
3826 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
3827 // CHECK: ret <16 x i8> [[VMAX_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)3828 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
3829 return vmaxq_u8(a, b);
3830 }
3831
3832 // CHECK-LABEL: @test_vmaxq_u16(
3833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3834 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3835 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
3836 // CHECK: ret <8 x i16> [[VMAX2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)3837 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
3838 return vmaxq_u16(a, b);
3839 }
3840
3841 // CHECK-LABEL: @test_vmaxq_u32(
3842 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3843 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3844 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
3845 // CHECK: ret <4 x i32> [[VMAX2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)3846 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
3847 return vmaxq_u32(a, b);
3848 }
3849
3850 // CHECK-LABEL: @test_vmaxq_f32(
3851 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3852 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3853 // CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
3854 // CHECK: ret <4 x float> [[VMAX2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)3855 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
3856 return vmaxq_f32(a, b);
3857 }
3858
3859 // CHECK-LABEL: @test_vmaxq_f64(
3860 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3861 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3862 // CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
3863 // CHECK: ret <2 x double> [[VMAX2_I]]
test_vmaxq_f64(float64x2_t a,float64x2_t b)3864 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
3865 return vmaxq_f64(a, b);
3866 }
3867
3868 // CHECK-LABEL: @test_vmin_s8(
3869 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
3870 // CHECK: ret <8 x i8> [[VMIN_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)3871 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
3872 return vmin_s8(a, b);
3873 }
3874
3875 // CHECK-LABEL: @test_vmin_s16(
3876 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3877 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3878 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
3879 // CHECK: ret <4 x i16> [[VMIN2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)3880 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
3881 return vmin_s16(a, b);
3882 }
3883
3884 // CHECK-LABEL: @test_vmin_s32(
3885 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3886 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3887 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
3888 // CHECK: ret <2 x i32> [[VMIN2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)3889 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
3890 return vmin_s32(a, b);
3891 }
3892
3893 // CHECK-LABEL: @test_vmin_u8(
3894 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
3895 // CHECK: ret <8 x i8> [[VMIN_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)3896 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
3897 return vmin_u8(a, b);
3898 }
3899
3900 // CHECK-LABEL: @test_vmin_u16(
3901 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3902 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3903 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
3904 // CHECK: ret <4 x i16> [[VMIN2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)3905 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
3906 return vmin_u16(a, b);
3907 }
3908
3909 // CHECK-LABEL: @test_vmin_u32(
3910 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3911 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3912 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
3913 // CHECK: ret <2 x i32> [[VMIN2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)3914 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
3915 return vmin_u32(a, b);
3916 }
3917
3918 // CHECK-LABEL: @test_vmin_f32(
3919 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3920 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3921 // CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b)
3922 // CHECK: ret <2 x float> [[VMIN2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)3923 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
3924 return vmin_f32(a, b);
3925 }
3926
3927 // CHECK-LABEL: @test_vminq_s8(
3928 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
3929 // CHECK: ret <16 x i8> [[VMIN_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)3930 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
3931 return vminq_s8(a, b);
3932 }
3933
3934 // CHECK-LABEL: @test_vminq_s16(
3935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3936 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3937 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
3938 // CHECK: ret <8 x i16> [[VMIN2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)3939 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
3940 return vminq_s16(a, b);
3941 }
3942
3943 // CHECK-LABEL: @test_vminq_s32(
3944 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3945 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3946 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
3947 // CHECK: ret <4 x i32> [[VMIN2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)3948 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
3949 return vminq_s32(a, b);
3950 }
3951
3952 // CHECK-LABEL: @test_vminq_u8(
3953 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
3954 // CHECK: ret <16 x i8> [[VMIN_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)3955 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
3956 return vminq_u8(a, b);
3957 }
3958
3959 // CHECK-LABEL: @test_vminq_u16(
3960 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3961 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3962 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
3963 // CHECK: ret <8 x i16> [[VMIN2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)3964 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
3965 return vminq_u16(a, b);
3966 }
3967
3968 // CHECK-LABEL: @test_vminq_u32(
3969 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3970 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3971 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
3972 // CHECK: ret <4 x i32> [[VMIN2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)3973 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
3974 return vminq_u32(a, b);
3975 }
3976
3977 // CHECK-LABEL: @test_vminq_f32(
3978 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3979 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3980 // CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b)
3981 // CHECK: ret <4 x float> [[VMIN2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)3982 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
3983 return vminq_f32(a, b);
3984 }
3985
3986 // CHECK-LABEL: @test_vminq_f64(
3987 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3988 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3989 // CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b)
3990 // CHECK: ret <2 x double> [[VMIN2_I]]
test_vminq_f64(float64x2_t a,float64x2_t b)3991 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
3992 return vminq_f64(a, b);
3993 }
3994
3995 // CHECK-LABEL: @test_vmaxnm_f32(
3996 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3997 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3998 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
3999 // CHECK: ret <2 x float> [[VMAXNM2_I]]
test_vmaxnm_f32(float32x2_t a,float32x2_t b)4000 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
4001 return vmaxnm_f32(a, b);
4002 }
4003
4004 // CHECK-LABEL: @test_vmaxnmq_f32(
4005 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4006 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4007 // CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
4008 // CHECK: ret <4 x float> [[VMAXNM2_I]]
test_vmaxnmq_f32(float32x4_t a,float32x4_t b)4009 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
4010 return vmaxnmq_f32(a, b);
4011 }
4012
4013 // CHECK-LABEL: @test_vmaxnmq_f64(
4014 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4015 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4016 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
4017 // CHECK: ret <2 x double> [[VMAXNM2_I]]
test_vmaxnmq_f64(float64x2_t a,float64x2_t b)4018 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
4019 return vmaxnmq_f64(a, b);
4020 }
4021
4022 // CHECK-LABEL: @test_vminnm_f32(
4023 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4024 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4025 // CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
4026 // CHECK: ret <2 x float> [[VMINNM2_I]]
test_vminnm_f32(float32x2_t a,float32x2_t b)4027 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
4028 return vminnm_f32(a, b);
4029 }
4030
4031 // CHECK-LABEL: @test_vminnmq_f32(
4032 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4033 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4034 // CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
4035 // CHECK: ret <4 x float> [[VMINNM2_I]]
test_vminnmq_f32(float32x4_t a,float32x4_t b)4036 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
4037 return vminnmq_f32(a, b);
4038 }
4039
4040 // CHECK-LABEL: @test_vminnmq_f64(
4041 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4042 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4043 // CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
4044 // CHECK: ret <2 x double> [[VMINNM2_I]]
test_vminnmq_f64(float64x2_t a,float64x2_t b)4045 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
4046 return vminnmq_f64(a, b);
4047 }
4048
4049 // CHECK-LABEL: @test_vpmax_s8(
4050 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4051 // CHECK: ret <8 x i8> [[VPMAX_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)4052 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
4053 return vpmax_s8(a, b);
4054 }
4055
4056 // CHECK-LABEL: @test_vpmax_s16(
4057 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4058 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4059 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4060 // CHECK: ret <4 x i16> [[VPMAX2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)4061 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
4062 return vpmax_s16(a, b);
4063 }
4064
4065 // CHECK-LABEL: @test_vpmax_s32(
4066 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4067 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4068 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4069 // CHECK: ret <2 x i32> [[VPMAX2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)4070 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
4071 return vpmax_s32(a, b);
4072 }
4073
4074 // CHECK-LABEL: @test_vpmax_u8(
4075 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4076 // CHECK: ret <8 x i8> [[VPMAX_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)4077 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
4078 return vpmax_u8(a, b);
4079 }
4080
4081 // CHECK-LABEL: @test_vpmax_u16(
4082 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4083 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4084 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4085 // CHECK: ret <4 x i16> [[VPMAX2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)4086 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
4087 return vpmax_u16(a, b);
4088 }
4089
4090 // CHECK-LABEL: @test_vpmax_u32(
4091 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4092 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4093 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4094 // CHECK: ret <2 x i32> [[VPMAX2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)4095 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
4096 return vpmax_u32(a, b);
4097 }
4098
4099 // CHECK-LABEL: @test_vpmax_f32(
4100 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4101 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4102 // CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
4103 // CHECK: ret <2 x float> [[VPMAX2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)4104 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
4105 return vpmax_f32(a, b);
4106 }
4107
4108 // CHECK-LABEL: @test_vpmaxq_s8(
4109 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4110 // CHECK: ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_s8(int8x16_t a,int8x16_t b)4111 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
4112 return vpmaxq_s8(a, b);
4113 }
4114
4115 // CHECK-LABEL: @test_vpmaxq_s16(
4116 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4117 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4118 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4119 // CHECK: ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_s16(int16x8_t a,int16x8_t b)4120 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
4121 return vpmaxq_s16(a, b);
4122 }
4123
4124 // CHECK-LABEL: @test_vpmaxq_s32(
4125 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4126 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4127 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4128 // CHECK: ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_s32(int32x4_t a,int32x4_t b)4129 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
4130 return vpmaxq_s32(a, b);
4131 }
4132
4133 // CHECK-LABEL: @test_vpmaxq_u8(
4134 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4135 // CHECK: ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_u8(uint8x16_t a,uint8x16_t b)4136 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
4137 return vpmaxq_u8(a, b);
4138 }
4139
4140 // CHECK-LABEL: @test_vpmaxq_u16(
4141 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4142 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4143 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4144 // CHECK: ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_u16(uint16x8_t a,uint16x8_t b)4145 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
4146 return vpmaxq_u16(a, b);
4147 }
4148
4149 // CHECK-LABEL: @test_vpmaxq_u32(
4150 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4151 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4152 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4153 // CHECK: ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_u32(uint32x4_t a,uint32x4_t b)4154 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
4155 return vpmaxq_u32(a, b);
4156 }
4157
4158 // CHECK-LABEL: @test_vpmaxq_f32(
4159 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4160 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4161 // CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
4162 // CHECK: ret <4 x float> [[VPMAX2_I]]
test_vpmaxq_f32(float32x4_t a,float32x4_t b)4163 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
4164 return vpmaxq_f32(a, b);
4165 }
4166
4167 // CHECK-LABEL: @test_vpmaxq_f64(
4168 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4169 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4170 // CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
4171 // CHECK: ret <2 x double> [[VPMAX2_I]]
test_vpmaxq_f64(float64x2_t a,float64x2_t b)4172 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
4173 return vpmaxq_f64(a, b);
4174 }
4175
4176 // CHECK-LABEL: @test_vpmin_s8(
4177 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4178 // CHECK: ret <8 x i8> [[VPMIN_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)4179 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
4180 return vpmin_s8(a, b);
4181 }
4182
4183 // CHECK-LABEL: @test_vpmin_s16(
4184 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4185 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4186 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4187 // CHECK: ret <4 x i16> [[VPMIN2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)4188 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
4189 return vpmin_s16(a, b);
4190 }
4191
4192 // CHECK-LABEL: @test_vpmin_s32(
4193 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4194 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4195 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4196 // CHECK: ret <2 x i32> [[VPMIN2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)4197 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
4198 return vpmin_s32(a, b);
4199 }
4200
4201 // CHECK-LABEL: @test_vpmin_u8(
4202 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4203 // CHECK: ret <8 x i8> [[VPMIN_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)4204 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
4205 return vpmin_u8(a, b);
4206 }
4207
4208 // CHECK-LABEL: @test_vpmin_u16(
4209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4210 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4211 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4212 // CHECK: ret <4 x i16> [[VPMIN2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)4213 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
4214 return vpmin_u16(a, b);
4215 }
4216
4217 // CHECK-LABEL: @test_vpmin_u32(
4218 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4219 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4220 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4221 // CHECK: ret <2 x i32> [[VPMIN2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)4222 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
4223 return vpmin_u32(a, b);
4224 }
4225
4226 // CHECK-LABEL: @test_vpmin_f32(
4227 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4228 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4229 // CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
4230 // CHECK: ret <2 x float> [[VPMIN2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)4231 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
4232 return vpmin_f32(a, b);
4233 }
4234
4235 // CHECK-LABEL: @test_vpminq_s8(
4236 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4237 // CHECK: ret <16 x i8> [[VPMIN_I]]
test_vpminq_s8(int8x16_t a,int8x16_t b)4238 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
4239 return vpminq_s8(a, b);
4240 }
4241
4242 // CHECK-LABEL: @test_vpminq_s16(
4243 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4244 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4245 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4246 // CHECK: ret <8 x i16> [[VPMIN2_I]]
test_vpminq_s16(int16x8_t a,int16x8_t b)4247 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
4248 return vpminq_s16(a, b);
4249 }
4250
4251 // CHECK-LABEL: @test_vpminq_s32(
4252 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4253 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4254 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4255 // CHECK: ret <4 x i32> [[VPMIN2_I]]
test_vpminq_s32(int32x4_t a,int32x4_t b)4256 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
4257 return vpminq_s32(a, b);
4258 }
4259
4260 // CHECK-LABEL: @test_vpminq_u8(
4261 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4262 // CHECK: ret <16 x i8> [[VPMIN_I]]
test_vpminq_u8(uint8x16_t a,uint8x16_t b)4263 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
4264 return vpminq_u8(a, b);
4265 }
4266
4267 // CHECK-LABEL: @test_vpminq_u16(
4268 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4270 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4271 // CHECK: ret <8 x i16> [[VPMIN2_I]]
test_vpminq_u16(uint16x8_t a,uint16x8_t b)4272 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
4273 return vpminq_u16(a, b);
4274 }
4275
4276 // CHECK-LABEL: @test_vpminq_u32(
4277 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4278 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4279 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4280 // CHECK: ret <4 x i32> [[VPMIN2_I]]
test_vpminq_u32(uint32x4_t a,uint32x4_t b)4281 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
4282 return vpminq_u32(a, b);
4283 }
4284
4285 // CHECK-LABEL: @test_vpminq_f32(
4286 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4287 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4288 // CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
4289 // CHECK: ret <4 x float> [[VPMIN2_I]]
test_vpminq_f32(float32x4_t a,float32x4_t b)4290 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
4291 return vpminq_f32(a, b);
4292 }
4293
4294 // CHECK-LABEL: @test_vpminq_f64(
4295 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4296 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4297 // CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
4298 // CHECK: ret <2 x double> [[VPMIN2_I]]
test_vpminq_f64(float64x2_t a,float64x2_t b)4299 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
4300 return vpminq_f64(a, b);
4301 }
4302
4303 // CHECK-LABEL: @test_vpmaxnm_f32(
4304 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4305 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4306 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
4307 // CHECK: ret <2 x float> [[VPMAXNM2_I]]
test_vpmaxnm_f32(float32x2_t a,float32x2_t b)4308 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
4309 return vpmaxnm_f32(a, b);
4310 }
4311
4312 // CHECK-LABEL: @test_vpmaxnmq_f32(
4313 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4314 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4315 // CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
4316 // CHECK: ret <4 x float> [[VPMAXNM2_I]]
test_vpmaxnmq_f32(float32x4_t a,float32x4_t b)4317 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
4318 return vpmaxnmq_f32(a, b);
4319 }
4320
4321 // CHECK-LABEL: @test_vpmaxnmq_f64(
4322 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4323 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4324 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
4325 // CHECK: ret <2 x double> [[VPMAXNM2_I]]
test_vpmaxnmq_f64(float64x2_t a,float64x2_t b)4326 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
4327 return vpmaxnmq_f64(a, b);
4328 }
4329
4330 // CHECK-LABEL: @test_vpminnm_f32(
4331 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4332 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4333 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
4334 // CHECK: ret <2 x float> [[VPMINNM2_I]]
test_vpminnm_f32(float32x2_t a,float32x2_t b)4335 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
4336 return vpminnm_f32(a, b);
4337 }
4338
4339 // CHECK-LABEL: @test_vpminnmq_f32(
4340 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4341 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4342 // CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
4343 // CHECK: ret <4 x float> [[VPMINNM2_I]]
test_vpminnmq_f32(float32x4_t a,float32x4_t b)4344 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
4345 return vpminnmq_f32(a, b);
4346 }
4347
4348 // CHECK-LABEL: @test_vpminnmq_f64(
4349 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4350 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4351 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
4352 // CHECK: ret <2 x double> [[VPMINNM2_I]]
test_vpminnmq_f64(float64x2_t a,float64x2_t b)4353 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
4354 return vpminnmq_f64(a, b);
4355 }
4356
4357 // CHECK-LABEL: @test_vpadd_s8(
4358 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4359 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)4360 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
4361 return vpadd_s8(a, b);
4362 }
4363
4364 // CHECK-LABEL: @test_vpadd_s16(
4365 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4366 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4367 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4368 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4369 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_s16(int16x4_t a,int16x4_t b)4370 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
4371 return vpadd_s16(a, b);
4372 }
4373
4374 // CHECK-LABEL: @test_vpadd_s32(
4375 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4376 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4377 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4378 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4379 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_s32(int32x2_t a,int32x2_t b)4380 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
4381 return vpadd_s32(a, b);
4382 }
4383
4384 // CHECK-LABEL: @test_vpadd_u8(
4385 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4386 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)4387 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
4388 return vpadd_u8(a, b);
4389 }
4390
4391 // CHECK-LABEL: @test_vpadd_u16(
4392 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4393 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4394 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4395 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4396 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)4397 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
4398 return vpadd_u16(a, b);
4399 }
4400
4401 // CHECK-LABEL: @test_vpadd_u32(
4402 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4403 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4404 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4405 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4406 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)4407 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
4408 return vpadd_u32(a, b);
4409 }
4410
4411 // CHECK-LABEL: @test_vpadd_f32(
4412 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4413 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4414 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
4415 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
4416 // CHECK: ret <2 x float> [[VPADD_V2_I]]
test_vpadd_f32(float32x2_t a,float32x2_t b)4417 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
4418 return vpadd_f32(a, b);
4419 }
4420
4421 // CHECK-LABEL: @test_vpaddq_s8(
4422 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4423 // CHECK: ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_s8(int8x16_t a,int8x16_t b)4424 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
4425 return vpaddq_s8(a, b);
4426 }
4427
4428 // CHECK-LABEL: @test_vpaddq_s16(
4429 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4430 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4431 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4432 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4433 // CHECK: ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_s16(int16x8_t a,int16x8_t b)4434 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
4435 return vpaddq_s16(a, b);
4436 }
4437
4438 // CHECK-LABEL: @test_vpaddq_s32(
4439 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4440 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4441 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4442 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4443 // CHECK: ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_s32(int32x4_t a,int32x4_t b)4444 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
4445 return vpaddq_s32(a, b);
4446 }
4447
4448 // CHECK-LABEL: @test_vpaddq_u8(
4449 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4450 // CHECK: ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_u8(uint8x16_t a,uint8x16_t b)4451 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
4452 return vpaddq_u8(a, b);
4453 }
4454
4455 // CHECK-LABEL: @test_vpaddq_u16(
4456 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4457 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4458 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4459 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4460 // CHECK: ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_u16(uint16x8_t a,uint16x8_t b)4461 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
4462 return vpaddq_u16(a, b);
4463 }
4464
4465 // CHECK-LABEL: @test_vpaddq_u32(
4466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4467 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4468 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4469 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4470 // CHECK: ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_u32(uint32x4_t a,uint32x4_t b)4471 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
4472 return vpaddq_u32(a, b);
4473 }
4474
4475 // CHECK-LABEL: @test_vpaddq_f32(
4476 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4477 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4478 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
4479 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
4480 // CHECK: ret <4 x float> [[VPADDQ_V2_I]]
test_vpaddq_f32(float32x4_t a,float32x4_t b)4481 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
4482 return vpaddq_f32(a, b);
4483 }
4484
4485 // CHECK-LABEL: @test_vpaddq_f64(
4486 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4487 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4488 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
4489 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
4490 // CHECK: ret <2 x double> [[VPADDQ_V2_I]]
test_vpaddq_f64(float64x2_t a,float64x2_t b)4491 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
4492 return vpaddq_f64(a, b);
4493 }
4494
4495 // CHECK-LABEL: @test_vqdmulh_s16(
4496 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4497 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4498 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4499 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
4500 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)4501 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
4502 return vqdmulh_s16(a, b);
4503 }
4504
4505 // CHECK-LABEL: @test_vqdmulh_s32(
4506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4507 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4508 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4509 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
4510 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)4511 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
4512 return vqdmulh_s32(a, b);
4513 }
4514
4515 // CHECK-LABEL: @test_vqdmulhq_s16(
4516 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4517 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4518 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4519 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
4520 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)4521 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
4522 return vqdmulhq_s16(a, b);
4523 }
4524
4525 // CHECK-LABEL: @test_vqdmulhq_s32(
4526 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4527 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4528 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4529 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
4530 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)4531 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
4532 return vqdmulhq_s32(a, b);
4533 }
4534
4535 // CHECK-LABEL: @test_vqrdmulh_s16(
4536 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4537 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4538 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4539 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
4540 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)4541 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
4542 return vqrdmulh_s16(a, b);
4543 }
4544
4545 // CHECK-LABEL: @test_vqrdmulh_s32(
4546 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4547 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4548 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4549 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
4550 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)4551 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
4552 return vqrdmulh_s32(a, b);
4553 }
4554
4555 // CHECK-LABEL: @test_vqrdmulhq_s16(
4556 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4557 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4558 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4559 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
4560 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)4561 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
4562 return vqrdmulhq_s16(a, b);
4563 }
4564
4565 // CHECK-LABEL: @test_vqrdmulhq_s32(
4566 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4567 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4568 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4569 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
4570 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)4571 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
4572 return vqrdmulhq_s32(a, b);
4573 }
4574
4575 // CHECK-LABEL: @test_vmulx_f32(
4576 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4577 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4578 // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
4579 // CHECK: ret <2 x float> [[VMULX2_I]]
test_vmulx_f32(float32x2_t a,float32x2_t b)4580 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
4581 return vmulx_f32(a, b);
4582 }
4583
4584 // CHECK-LABEL: @test_vmulxq_f32(
4585 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4586 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4587 // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
4588 // CHECK: ret <4 x float> [[VMULX2_I]]
test_vmulxq_f32(float32x4_t a,float32x4_t b)4589 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
4590 return vmulxq_f32(a, b);
4591 }
4592
4593 // CHECK-LABEL: @test_vmulxq_f64(
4594 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4595 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4596 // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
4597 // CHECK: ret <2 x double> [[VMULX2_I]]
test_vmulxq_f64(float64x2_t a,float64x2_t b)4598 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
4599 return vmulxq_f64(a, b);
4600 }
4601
4602 // CHECK-LABEL: @test_vshl_n_s8(
4603 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4604 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)4605 int8x8_t test_vshl_n_s8(int8x8_t a) {
4606 return vshl_n_s8(a, 3);
4607 }
4608
4609 // CHECK-LABEL: @test_vshl_n_s16(
4610 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4612 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4613 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)4614 int16x4_t test_vshl_n_s16(int16x4_t a) {
4615 return vshl_n_s16(a, 3);
4616 }
4617
4618 // CHECK-LABEL: @test_vshl_n_s32(
4619 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4620 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4621 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4622 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)4623 int32x2_t test_vshl_n_s32(int32x2_t a) {
4624 return vshl_n_s32(a, 3);
4625 }
4626
4627 // CHECK-LABEL: @test_vshlq_n_s8(
4628 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4629 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)4630 int8x16_t test_vshlq_n_s8(int8x16_t a) {
4631 return vshlq_n_s8(a, 3);
4632 }
4633
4634 // CHECK-LABEL: @test_vshlq_n_s16(
4635 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4636 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4637 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4638 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)4639 int16x8_t test_vshlq_n_s16(int16x8_t a) {
4640 return vshlq_n_s16(a, 3);
4641 }
4642
4643 // CHECK-LABEL: @test_vshlq_n_s32(
4644 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4645 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4646 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4647 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)4648 int32x4_t test_vshlq_n_s32(int32x4_t a) {
4649 return vshlq_n_s32(a, 3);
4650 }
4651
4652 // CHECK-LABEL: @test_vshlq_n_s64(
4653 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4654 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4655 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4656 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)4657 int64x2_t test_vshlq_n_s64(int64x2_t a) {
4658 return vshlq_n_s64(a, 3);
4659 }
4660
4661 // CHECK-LABEL: @test_vshl_n_u8(
4662 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4663 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(int8x8_t a)4664 int8x8_t test_vshl_n_u8(int8x8_t a) {
4665 return vshl_n_u8(a, 3);
4666 }
4667
4668 // CHECK-LABEL: @test_vshl_n_u16(
4669 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4670 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4671 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4672 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(int16x4_t a)4673 int16x4_t test_vshl_n_u16(int16x4_t a) {
4674 return vshl_n_u16(a, 3);
4675 }
4676
4677 // CHECK-LABEL: @test_vshl_n_u32(
4678 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4679 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4680 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4681 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(int32x2_t a)4682 int32x2_t test_vshl_n_u32(int32x2_t a) {
4683 return vshl_n_u32(a, 3);
4684 }
4685
4686 // CHECK-LABEL: @test_vshlq_n_u8(
4687 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4688 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(int8x16_t a)4689 int8x16_t test_vshlq_n_u8(int8x16_t a) {
4690 return vshlq_n_u8(a, 3);
4691 }
4692
4693 // CHECK-LABEL: @test_vshlq_n_u16(
4694 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4695 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4696 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4697 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(int16x8_t a)4698 int16x8_t test_vshlq_n_u16(int16x8_t a) {
4699 return vshlq_n_u16(a, 3);
4700 }
4701
4702 // CHECK-LABEL: @test_vshlq_n_u32(
4703 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4704 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4706 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(int32x4_t a)4707 int32x4_t test_vshlq_n_u32(int32x4_t a) {
4708 return vshlq_n_u32(a, 3);
4709 }
4710
4711 // CHECK-LABEL: @test_vshlq_n_u64(
4712 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4713 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4714 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4715 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(int64x2_t a)4716 int64x2_t test_vshlq_n_u64(int64x2_t a) {
4717 return vshlq_n_u64(a, 3);
4718 }
4719
4720 // CHECK-LABEL: @test_vshr_n_s8(
4721 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4722 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)4723 int8x8_t test_vshr_n_s8(int8x8_t a) {
4724 return vshr_n_s8(a, 3);
4725 }
4726
4727 // CHECK-LABEL: @test_vshr_n_s16(
4728 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4729 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4730 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4731 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)4732 int16x4_t test_vshr_n_s16(int16x4_t a) {
4733 return vshr_n_s16(a, 3);
4734 }
4735
4736 // CHECK-LABEL: @test_vshr_n_s32(
4737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4738 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4739 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
4740 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)4741 int32x2_t test_vshr_n_s32(int32x2_t a) {
4742 return vshr_n_s32(a, 3);
4743 }
4744
4745 // CHECK-LABEL: @test_vshrq_n_s8(
4746 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4747 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)4748 int8x16_t test_vshrq_n_s8(int8x16_t a) {
4749 return vshrq_n_s8(a, 3);
4750 }
4751
4752 // CHECK-LABEL: @test_vshrq_n_s16(
4753 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4754 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4755 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4756 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)4757 int16x8_t test_vshrq_n_s16(int16x8_t a) {
4758 return vshrq_n_s16(a, 3);
4759 }
4760
4761 // CHECK-LABEL: @test_vshrq_n_s32(
4762 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4763 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4764 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4765 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)4766 int32x4_t test_vshrq_n_s32(int32x4_t a) {
4767 return vshrq_n_s32(a, 3);
4768 }
4769
4770 // CHECK-LABEL: @test_vshrq_n_s64(
4771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4772 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4773 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
4774 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)4775 int64x2_t test_vshrq_n_s64(int64x2_t a) {
4776 return vshrq_n_s64(a, 3);
4777 }
4778
4779 // CHECK-LABEL: @test_vshr_n_u8(
4780 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4781 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(int8x8_t a)4782 int8x8_t test_vshr_n_u8(int8x8_t a) {
4783 return vshr_n_u8(a, 3);
4784 }
4785
4786 // CHECK-LABEL: @test_vshr_n_u16(
4787 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4788 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4789 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4790 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(int16x4_t a)4791 int16x4_t test_vshr_n_u16(int16x4_t a) {
4792 return vshr_n_u16(a, 3);
4793 }
4794
4795 // CHECK-LABEL: @test_vshr_n_u32(
4796 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4797 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4798 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
4799 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(int32x2_t a)4800 int32x2_t test_vshr_n_u32(int32x2_t a) {
4801 return vshr_n_u32(a, 3);
4802 }
4803
4804 // CHECK-LABEL: @test_vshrq_n_u8(
4805 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4806 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(int8x16_t a)4807 int8x16_t test_vshrq_n_u8(int8x16_t a) {
4808 return vshrq_n_u8(a, 3);
4809 }
4810
4811 // CHECK-LABEL: @test_vshrq_n_u16(
4812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4813 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4815 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(int16x8_t a)4816 int16x8_t test_vshrq_n_u16(int16x8_t a) {
4817 return vshrq_n_u16(a, 3);
4818 }
4819
4820 // CHECK-LABEL: @test_vshrq_n_u32(
4821 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4822 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4823 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4824 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(int32x4_t a)4825 int32x4_t test_vshrq_n_u32(int32x4_t a) {
4826 return vshrq_n_u32(a, 3);
4827 }
4828
4829 // CHECK-LABEL: @test_vshrq_n_u64(
4830 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4831 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4832 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
4833 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(int64x2_t a)4834 int64x2_t test_vshrq_n_u64(int64x2_t a) {
4835 return vshrq_n_u64(a, 3);
4836 }
4837
4838 // CHECK-LABEL: @test_vsra_n_s8(
4839 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4840 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4841 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)4842 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
4843 return vsra_n_s8(a, b, 3);
4844 }
4845
4846 // CHECK-LABEL: @test_vsra_n_s16(
4847 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4848 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4849 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4850 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4851 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4852 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4853 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)4854 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
4855 return vsra_n_s16(a, b, 3);
4856 }
4857
4858 // CHECK-LABEL: @test_vsra_n_s32(
4859 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4860 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4861 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4862 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4863 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
4864 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4865 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)4866 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
4867 return vsra_n_s32(a, b, 3);
4868 }
4869
4870 // CHECK-LABEL: @test_vsraq_n_s8(
4871 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4872 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4873 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)4874 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
4875 return vsraq_n_s8(a, b, 3);
4876 }
4877
4878 // CHECK-LABEL: @test_vsraq_n_s16(
4879 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4880 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4881 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4882 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4883 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4884 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4885 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)4886 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
4887 return vsraq_n_s16(a, b, 3);
4888 }
4889
4890 // CHECK-LABEL: @test_vsraq_n_s32(
4891 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4892 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4893 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4894 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4895 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4896 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4897 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)4898 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
4899 return vsraq_n_s32(a, b, 3);
4900 }
4901
4902 // CHECK-LABEL: @test_vsraq_n_s64(
4903 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4904 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4905 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4906 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4907 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
4908 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4909 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)4910 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
4911 return vsraq_n_s64(a, b, 3);
4912 }
4913
4914 // CHECK-LABEL: @test_vsra_n_u8(
4915 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4916 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4917 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_u8(int8x8_t a,int8x8_t b)4918 int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) {
4919 return vsra_n_u8(a, b, 3);
4920 }
4921
4922 // CHECK-LABEL: @test_vsra_n_u16(
4923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4925 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4926 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4927 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4928 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4929 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_u16(int16x4_t a,int16x4_t b)4930 int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) {
4931 return vsra_n_u16(a, b, 3);
4932 }
4933
4934 // CHECK-LABEL: @test_vsra_n_u32(
4935 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4936 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4937 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4938 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4939 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
4940 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4941 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_u32(int32x2_t a,int32x2_t b)4942 int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) {
4943 return vsra_n_u32(a, b, 3);
4944 }
4945
4946 // CHECK-LABEL: @test_vsraq_n_u8(
4947 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4948 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4949 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(int8x16_t a,int8x16_t b)4950 int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) {
4951 return vsraq_n_u8(a, b, 3);
4952 }
4953
4954 // CHECK-LABEL: @test_vsraq_n_u16(
4955 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4956 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4957 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4958 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4959 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4960 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4961 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(int16x8_t a,int16x8_t b)4962 int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) {
4963 return vsraq_n_u16(a, b, 3);
4964 }
4965
4966 // CHECK-LABEL: @test_vsraq_n_u32(
4967 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4968 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4969 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4970 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4971 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4972 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4973 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(int32x4_t a,int32x4_t b)4974 int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) {
4975 return vsraq_n_u32(a, b, 3);
4976 }
4977
4978 // CHECK-LABEL: @test_vsraq_n_u64(
4979 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4980 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4981 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4982 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4983 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
4984 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4985 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(int64x2_t a,int64x2_t b)4986 int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) {
4987 return vsraq_n_u64(a, b, 3);
4988 }
4989
4990 // CHECK-LABEL: @test_vrshr_n_s8(
4991 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
4992 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)4993 int8x8_t test_vrshr_n_s8(int8x8_t a) {
4994 return vrshr_n_s8(a, 3);
4995 }
4996
4997 // CHECK-LABEL: @test_vrshr_n_s16(
4998 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4999 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5000 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5001 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)5002 int16x4_t test_vrshr_n_s16(int16x4_t a) {
5003 return vrshr_n_s16(a, 3);
5004 }
5005
5006 // CHECK-LABEL: @test_vrshr_n_s32(
5007 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5008 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5009 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5010 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)5011 int32x2_t test_vrshr_n_s32(int32x2_t a) {
5012 return vrshr_n_s32(a, 3);
5013 }
5014
5015 // CHECK-LABEL: @test_vrshrq_n_s8(
5016 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5017 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)5018 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
5019 return vrshrq_n_s8(a, 3);
5020 }
5021
5022 // CHECK-LABEL: @test_vrshrq_n_s16(
5023 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5024 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5025 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5026 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)5027 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
5028 return vrshrq_n_s16(a, 3);
5029 }
5030
5031 // CHECK-LABEL: @test_vrshrq_n_s32(
5032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5033 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5034 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5035 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)5036 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
5037 return vrshrq_n_s32(a, 3);
5038 }
5039
5040 // CHECK-LABEL: @test_vrshrq_n_s64(
5041 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5042 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5043 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5044 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)5045 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
5046 return vrshrq_n_s64(a, 3);
5047 }
5048
5049 // CHECK-LABEL: @test_vrshr_n_u8(
5050 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5051 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(int8x8_t a)5052 int8x8_t test_vrshr_n_u8(int8x8_t a) {
5053 return vrshr_n_u8(a, 3);
5054 }
5055
5056 // CHECK-LABEL: @test_vrshr_n_u16(
5057 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5058 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5059 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5060 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(int16x4_t a)5061 int16x4_t test_vrshr_n_u16(int16x4_t a) {
5062 return vrshr_n_u16(a, 3);
5063 }
5064
5065 // CHECK-LABEL: @test_vrshr_n_u32(
5066 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5067 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5068 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5069 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(int32x2_t a)5070 int32x2_t test_vrshr_n_u32(int32x2_t a) {
5071 return vrshr_n_u32(a, 3);
5072 }
5073
5074 // CHECK-LABEL: @test_vrshrq_n_u8(
5075 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5076 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(int8x16_t a)5077 int8x16_t test_vrshrq_n_u8(int8x16_t a) {
5078 return vrshrq_n_u8(a, 3);
5079 }
5080
5081 // CHECK-LABEL: @test_vrshrq_n_u16(
5082 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5083 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5084 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5085 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(int16x8_t a)5086 int16x8_t test_vrshrq_n_u16(int16x8_t a) {
5087 return vrshrq_n_u16(a, 3);
5088 }
5089
5090 // CHECK-LABEL: @test_vrshrq_n_u32(
5091 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5092 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5093 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5094 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(int32x4_t a)5095 int32x4_t test_vrshrq_n_u32(int32x4_t a) {
5096 return vrshrq_n_u32(a, 3);
5097 }
5098
5099 // CHECK-LABEL: @test_vrshrq_n_u64(
5100 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5101 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5102 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5103 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(int64x2_t a)5104 int64x2_t test_vrshrq_n_u64(int64x2_t a) {
5105 return vrshrq_n_u64(a, 3);
5106 }
5107
5108 // CHECK-LABEL: @test_vrsra_n_s8(
5109 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5110 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5111 // CHECK: ret <8 x i8> [[TMP0]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)5112 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
5113 return vrsra_n_s8(a, b, 3);
5114 }
5115
5116 // CHECK-LABEL: @test_vrsra_n_s16(
5117 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5118 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5119 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5120 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5121 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5122 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5123 // CHECK: ret <4 x i16> [[TMP3]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)5124 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
5125 return vrsra_n_s16(a, b, 3);
5126 }
5127
5128 // CHECK-LABEL: @test_vrsra_n_s32(
5129 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5130 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5131 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5132 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5133 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5134 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5135 // CHECK: ret <2 x i32> [[TMP3]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)5136 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
5137 return vrsra_n_s32(a, b, 3);
5138 }
5139
5140 // CHECK-LABEL: @test_vrsraq_n_s8(
5141 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5142 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5143 // CHECK: ret <16 x i8> [[TMP0]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)5144 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
5145 return vrsraq_n_s8(a, b, 3);
5146 }
5147
5148 // CHECK-LABEL: @test_vrsraq_n_s16(
5149 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5150 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5151 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5152 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5153 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5154 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5155 // CHECK: ret <8 x i16> [[TMP3]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)5156 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
5157 return vrsraq_n_s16(a, b, 3);
5158 }
5159
5160 // CHECK-LABEL: @test_vrsraq_n_s32(
5161 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5162 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5163 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5165 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5166 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5167 // CHECK: ret <4 x i32> [[TMP3]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)5168 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
5169 return vrsraq_n_s32(a, b, 3);
5170 }
5171
5172 // CHECK-LABEL: @test_vrsraq_n_s64(
5173 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5174 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5175 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5176 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5177 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5178 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5179 // CHECK: ret <2 x i64> [[TMP3]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)5180 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
5181 return vrsraq_n_s64(a, b, 3);
5182 }
5183
5184 // CHECK-LABEL: @test_vrsra_n_u8(
5185 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5186 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5187 // CHECK: ret <8 x i8> [[TMP0]]
test_vrsra_n_u8(int8x8_t a,int8x8_t b)5188 int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) {
5189 return vrsra_n_u8(a, b, 3);
5190 }
5191
5192 // CHECK-LABEL: @test_vrsra_n_u16(
5193 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5194 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5195 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5196 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5197 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5198 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5199 // CHECK: ret <4 x i16> [[TMP3]]
test_vrsra_n_u16(int16x4_t a,int16x4_t b)5200 int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) {
5201 return vrsra_n_u16(a, b, 3);
5202 }
5203
5204 // CHECK-LABEL: @test_vrsra_n_u32(
5205 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5206 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5207 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5208 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5209 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5210 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5211 // CHECK: ret <2 x i32> [[TMP3]]
test_vrsra_n_u32(int32x2_t a,int32x2_t b)5212 int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) {
5213 return vrsra_n_u32(a, b, 3);
5214 }
5215
5216 // CHECK-LABEL: @test_vrsraq_n_u8(
5217 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5218 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5219 // CHECK: ret <16 x i8> [[TMP0]]
test_vrsraq_n_u8(int8x16_t a,int8x16_t b)5220 int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) {
5221 return vrsraq_n_u8(a, b, 3);
5222 }
5223
5224 // CHECK-LABEL: @test_vrsraq_n_u16(
5225 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5226 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5227 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5228 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5229 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5230 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5231 // CHECK: ret <8 x i16> [[TMP3]]
test_vrsraq_n_u16(int16x8_t a,int16x8_t b)5232 int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) {
5233 return vrsraq_n_u16(a, b, 3);
5234 }
5235
5236 // CHECK-LABEL: @test_vrsraq_n_u32(
5237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5238 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5239 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5240 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5241 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5242 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5243 // CHECK: ret <4 x i32> [[TMP3]]
test_vrsraq_n_u32(int32x4_t a,int32x4_t b)5244 int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) {
5245 return vrsraq_n_u32(a, b, 3);
5246 }
5247
5248 // CHECK-LABEL: @test_vrsraq_n_u64(
5249 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5250 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5251 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5252 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5253 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5254 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5255 // CHECK: ret <2 x i64> [[TMP3]]
test_vrsraq_n_u64(int64x2_t a,int64x2_t b)5256 int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) {
5257 return vrsraq_n_u64(a, b, 3);
5258 }
5259
5260 // CHECK-LABEL: @test_vsri_n_s8(
5261 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5262 // CHECK: ret <8 x i8> [[VSRI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)5263 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
5264 return vsri_n_s8(a, b, 3);
5265 }
5266
5267 // CHECK-LABEL: @test_vsri_n_s16(
5268 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5269 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5270 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5271 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5272 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5273 // CHECK: ret <4 x i16> [[VSRI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)5274 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
5275 return vsri_n_s16(a, b, 3);
5276 }
5277
5278 // CHECK-LABEL: @test_vsri_n_s32(
5279 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5280 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5281 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5282 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5283 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5284 // CHECK: ret <2 x i32> [[VSRI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)5285 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
5286 return vsri_n_s32(a, b, 3);
5287 }
5288
5289 // CHECK-LABEL: @test_vsriq_n_s8(
5290 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5291 // CHECK: ret <16 x i8> [[VSRI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)5292 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
5293 return vsriq_n_s8(a, b, 3);
5294 }
5295
5296 // CHECK-LABEL: @test_vsriq_n_s16(
5297 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5298 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5299 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5300 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5301 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5302 // CHECK: ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)5303 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
5304 return vsriq_n_s16(a, b, 3);
5305 }
5306
5307 // CHECK-LABEL: @test_vsriq_n_s32(
5308 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5309 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5310 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5311 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5312 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5313 // CHECK: ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)5314 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
5315 return vsriq_n_s32(a, b, 3);
5316 }
5317
5318 // CHECK-LABEL: @test_vsriq_n_s64(
5319 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5320 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5321 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5322 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5323 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5324 // CHECK: ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)5325 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
5326 return vsriq_n_s64(a, b, 3);
5327 }
5328
5329 // CHECK-LABEL: @test_vsri_n_u8(
5330 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5331 // CHECK: ret <8 x i8> [[VSRI_N]]
test_vsri_n_u8(int8x8_t a,int8x8_t b)5332 int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) {
5333 return vsri_n_u8(a, b, 3);
5334 }
5335
5336 // CHECK-LABEL: @test_vsri_n_u16(
5337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5339 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5340 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5341 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5342 // CHECK: ret <4 x i16> [[VSRI_N2]]
test_vsri_n_u16(int16x4_t a,int16x4_t b)5343 int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) {
5344 return vsri_n_u16(a, b, 3);
5345 }
5346
5347 // CHECK-LABEL: @test_vsri_n_u32(
5348 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5349 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5350 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5351 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5352 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5353 // CHECK: ret <2 x i32> [[VSRI_N2]]
test_vsri_n_u32(int32x2_t a,int32x2_t b)5354 int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) {
5355 return vsri_n_u32(a, b, 3);
5356 }
5357
5358 // CHECK-LABEL: @test_vsriq_n_u8(
5359 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5360 // CHECK: ret <16 x i8> [[VSRI_N]]
test_vsriq_n_u8(int8x16_t a,int8x16_t b)5361 int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) {
5362 return vsriq_n_u8(a, b, 3);
5363 }
5364
5365 // CHECK-LABEL: @test_vsriq_n_u16(
5366 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5367 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5368 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5369 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5370 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5371 // CHECK: ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_u16(int16x8_t a,int16x8_t b)5372 int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) {
5373 return vsriq_n_u16(a, b, 3);
5374 }
5375
5376 // CHECK-LABEL: @test_vsriq_n_u32(
5377 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5378 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5379 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5380 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5381 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5382 // CHECK: ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_u32(int32x4_t a,int32x4_t b)5383 int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) {
5384 return vsriq_n_u32(a, b, 3);
5385 }
5386
5387 // CHECK-LABEL: @test_vsriq_n_u64(
5388 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5389 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5390 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5391 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5392 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5393 // CHECK: ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_u64(int64x2_t a,int64x2_t b)5394 int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) {
5395 return vsriq_n_u64(a, b, 3);
5396 }
5397
5398 // CHECK-LABEL: @test_vsri_n_p8(
5399 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5400 // CHECK: ret <8 x i8> [[VSRI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)5401 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
5402 return vsri_n_p8(a, b, 3);
5403 }
5404
5405 // CHECK-LABEL: @test_vsri_n_p16(
5406 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5407 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5408 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5409 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5410 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
5411 // CHECK: ret <4 x i16> [[VSRI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)5412 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
5413 return vsri_n_p16(a, b, 15);
5414 }
5415
5416 // CHECK-LABEL: @test_vsriq_n_p8(
5417 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5418 // CHECK: ret <16 x i8> [[VSRI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)5419 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
5420 return vsriq_n_p8(a, b, 3);
5421 }
5422
5423 // CHECK-LABEL: @test_vsriq_n_p16(
5424 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5425 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5426 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5427 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5428 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
5429 // CHECK: ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)5430 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
5431 return vsriq_n_p16(a, b, 15);
5432 }
5433
5434 // CHECK-LABEL: @test_vsli_n_s8(
5435 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5436 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)5437 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
5438 return vsli_n_s8(a, b, 3);
5439 }
5440
5441 // CHECK-LABEL: @test_vsli_n_s16(
5442 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5444 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5445 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5446 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5447 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)5448 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
5449 return vsli_n_s16(a, b, 3);
5450 }
5451
5452 // CHECK-LABEL: @test_vsli_n_s32(
5453 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5454 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5455 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5456 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5457 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5458 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)5459 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
5460 return vsli_n_s32(a, b, 3);
5461 }
5462
5463 // CHECK-LABEL: @test_vsliq_n_s8(
5464 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5465 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)5466 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
5467 return vsliq_n_s8(a, b, 3);
5468 }
5469
5470 // CHECK-LABEL: @test_vsliq_n_s16(
5471 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5472 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5473 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5474 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5475 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5476 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)5477 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
5478 return vsliq_n_s16(a, b, 3);
5479 }
5480
5481 // CHECK-LABEL: @test_vsliq_n_s32(
5482 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5483 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5484 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5485 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5486 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5487 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)5488 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
5489 return vsliq_n_s32(a, b, 3);
5490 }
5491
5492 // CHECK-LABEL: @test_vsliq_n_s64(
5493 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5494 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5495 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5496 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5497 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5498 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)5499 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
5500 return vsliq_n_s64(a, b, 3);
5501 }
5502
5503 // CHECK-LABEL: @test_vsli_n_u8(
5504 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5505 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)5506 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
5507 return vsli_n_u8(a, b, 3);
5508 }
5509
5510 // CHECK-LABEL: @test_vsli_n_u16(
5511 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5512 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5513 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5514 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5515 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5516 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)5517 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
5518 return vsli_n_u16(a, b, 3);
5519 }
5520
5521 // CHECK-LABEL: @test_vsli_n_u32(
5522 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5523 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5524 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5525 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5526 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5527 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)5528 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
5529 return vsli_n_u32(a, b, 3);
5530 }
5531
5532 // CHECK-LABEL: @test_vsliq_n_u8(
5533 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5534 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)5535 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
5536 return vsliq_n_u8(a, b, 3);
5537 }
5538
5539 // CHECK-LABEL: @test_vsliq_n_u16(
5540 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5541 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5542 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5543 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5544 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5545 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)5546 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
5547 return vsliq_n_u16(a, b, 3);
5548 }
5549
5550 // CHECK-LABEL: @test_vsliq_n_u32(
5551 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5552 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5553 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5554 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5555 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5556 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)5557 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
5558 return vsliq_n_u32(a, b, 3);
5559 }
5560
5561 // CHECK-LABEL: @test_vsliq_n_u64(
5562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5563 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5564 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5565 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5566 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5567 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)5568 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
5569 return vsliq_n_u64(a, b, 3);
5570 }
5571
5572 // CHECK-LABEL: @test_vsli_n_p8(
5573 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5574 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)5575 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
5576 return vsli_n_p8(a, b, 3);
5577 }
5578
5579 // CHECK-LABEL: @test_vsli_n_p16(
5580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5582 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5583 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5584 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
5585 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)5586 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
5587 return vsli_n_p16(a, b, 15);
5588 }
5589
5590 // CHECK-LABEL: @test_vsliq_n_p8(
5591 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5592 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)5593 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
5594 return vsliq_n_p8(a, b, 3);
5595 }
5596
5597 // CHECK-LABEL: @test_vsliq_n_p16(
5598 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5599 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5600 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5601 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5602 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
5603 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)5604 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
5605 return vsliq_n_p16(a, b, 15);
5606 }
5607
5608 // CHECK-LABEL: @test_vqshlu_n_s8(
5609 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5610 // CHECK: ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)5611 int8x8_t test_vqshlu_n_s8(int8x8_t a) {
5612 return vqshlu_n_s8(a, 3);
5613 }
5614
5615 // CHECK-LABEL: @test_vqshlu_n_s16(
5616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5617 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5618 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
5619 // CHECK: ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)5620 int16x4_t test_vqshlu_n_s16(int16x4_t a) {
5621 return vqshlu_n_s16(a, 3);
5622 }
5623
5624 // CHECK-LABEL: @test_vqshlu_n_s32(
5625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5626 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5627 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
5628 // CHECK: ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)5629 int32x2_t test_vqshlu_n_s32(int32x2_t a) {
5630 return vqshlu_n_s32(a, 3);
5631 }
5632
5633 // CHECK-LABEL: @test_vqshluq_n_s8(
5634 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5635 // CHECK: ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)5636 int8x16_t test_vqshluq_n_s8(int8x16_t a) {
5637 return vqshluq_n_s8(a, 3);
5638 }
5639
5640 // CHECK-LABEL: @test_vqshluq_n_s16(
5641 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5642 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5643 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
5644 // CHECK: ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)5645 int16x8_t test_vqshluq_n_s16(int16x8_t a) {
5646 return vqshluq_n_s16(a, 3);
5647 }
5648
5649 // CHECK-LABEL: @test_vqshluq_n_s32(
5650 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5651 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5652 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
5653 // CHECK: ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)5654 int32x4_t test_vqshluq_n_s32(int32x4_t a) {
5655 return vqshluq_n_s32(a, 3);
5656 }
5657
5658 // CHECK-LABEL: @test_vqshluq_n_s64(
5659 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5660 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5661 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
5662 // CHECK: ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)5663 int64x2_t test_vqshluq_n_s64(int64x2_t a) {
5664 return vqshluq_n_s64(a, 3);
5665 }
5666
5667 // CHECK-LABEL: @test_vshrn_n_s16(
5668 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5669 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5670 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5671 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5672 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)5673 int8x8_t test_vshrn_n_s16(int16x8_t a) {
5674 return vshrn_n_s16(a, 3);
5675 }
5676
5677 // CHECK-LABEL: @test_vshrn_n_s32(
5678 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5679 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5680 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5681 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5682 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)5683 int16x4_t test_vshrn_n_s32(int32x4_t a) {
5684 return vshrn_n_s32(a, 9);
5685 }
5686
5687 // CHECK-LABEL: @test_vshrn_n_s64(
5688 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5689 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5690 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5691 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5692 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)5693 int32x2_t test_vshrn_n_s64(int64x2_t a) {
5694 return vshrn_n_s64(a, 19);
5695 }
5696
5697 // CHECK-LABEL: @test_vshrn_n_u16(
5698 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5699 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5700 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5701 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5702 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)5703 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
5704 return vshrn_n_u16(a, 3);
5705 }
5706
5707 // CHECK-LABEL: @test_vshrn_n_u32(
5708 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5709 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5710 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5711 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5712 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)5713 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
5714 return vshrn_n_u32(a, 9);
5715 }
5716
5717 // CHECK-LABEL: @test_vshrn_n_u64(
5718 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5719 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5720 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5721 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5722 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)5723 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
5724 return vshrn_n_u64(a, 19);
5725 }
5726
5727 // CHECK-LABEL: @test_vshrn_high_n_s16(
5728 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5729 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5730 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5731 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5732 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5733 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_s16(int8x8_t a,int16x8_t b)5734 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5735 return vshrn_high_n_s16(a, b, 3);
5736 }
5737
5738 // CHECK-LABEL: @test_vshrn_high_n_s32(
5739 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5740 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5741 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5742 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5743 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5744 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_s32(int16x4_t a,int32x4_t b)5745 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5746 return vshrn_high_n_s32(a, b, 9);
5747 }
5748
5749 // CHECK-LABEL: @test_vshrn_high_n_s64(
5750 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5751 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5752 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5753 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5754 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5755 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_s64(int32x2_t a,int64x2_t b)5756 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5757 return vshrn_high_n_s64(a, b, 19);
5758 }
5759
5760 // CHECK-LABEL: @test_vshrn_high_n_u16(
5761 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5762 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5763 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5764 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5765 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5766 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5767 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5768 return vshrn_high_n_u16(a, b, 3);
5769 }
5770
5771 // CHECK-LABEL: @test_vshrn_high_n_u32(
5772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5773 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5774 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5775 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5776 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5777 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5778 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5779 return vshrn_high_n_u32(a, b, 9);
5780 }
5781
5782 // CHECK-LABEL: @test_vshrn_high_n_u64(
5783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5784 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5785 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5786 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5787 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5788 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5789 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5790 return vshrn_high_n_u64(a, b, 19);
5791 }
5792
5793 // CHECK-LABEL: @test_vqshrun_n_s16(
5794 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5795 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5796 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5797 // CHECK: ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)5798 int8x8_t test_vqshrun_n_s16(int16x8_t a) {
5799 return vqshrun_n_s16(a, 3);
5800 }
5801
5802 // CHECK-LABEL: @test_vqshrun_n_s32(
5803 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5804 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5805 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5806 // CHECK: ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)5807 int16x4_t test_vqshrun_n_s32(int32x4_t a) {
5808 return vqshrun_n_s32(a, 9);
5809 }
5810
5811 // CHECK-LABEL: @test_vqshrun_n_s64(
5812 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5813 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5814 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5815 // CHECK: ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)5816 int32x2_t test_vqshrun_n_s64(int64x2_t a) {
5817 return vqshrun_n_s64(a, 19);
5818 }
5819
5820 // CHECK-LABEL: @test_vqshrun_high_n_s16(
5821 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5822 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5823 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5824 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5825 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqshrun_high_n_s16(int8x8_t a,int16x8_t b)5826 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5827 return vqshrun_high_n_s16(a, b, 3);
5828 }
5829
5830 // CHECK-LABEL: @test_vqshrun_high_n_s32(
5831 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5832 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5833 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5834 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5835 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqshrun_high_n_s32(int16x4_t a,int32x4_t b)5836 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
5837 return vqshrun_high_n_s32(a, b, 9);
5838 }
5839
5840 // CHECK-LABEL: @test_vqshrun_high_n_s64(
5841 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5842 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5843 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5844 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5845 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqshrun_high_n_s64(int32x2_t a,int64x2_t b)5846 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
5847 return vqshrun_high_n_s64(a, b, 19);
5848 }
5849
5850 // CHECK-LABEL: @test_vrshrn_n_s16(
5851 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5852 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5853 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5854 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)5855 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
5856 return vrshrn_n_s16(a, 3);
5857 }
5858
5859 // CHECK-LABEL: @test_vrshrn_n_s32(
5860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5861 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5862 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5863 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)5864 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
5865 return vrshrn_n_s32(a, 9);
5866 }
5867
5868 // CHECK-LABEL: @test_vrshrn_n_s64(
5869 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5870 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5871 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5872 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)5873 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
5874 return vrshrn_n_s64(a, 19);
5875 }
5876
5877 // CHECK-LABEL: @test_vrshrn_n_u16(
5878 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5879 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5880 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5881 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)5882 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
5883 return vrshrn_n_u16(a, 3);
5884 }
5885
5886 // CHECK-LABEL: @test_vrshrn_n_u32(
5887 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5888 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5889 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5890 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)5891 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
5892 return vrshrn_n_u32(a, 9);
5893 }
5894
5895 // CHECK-LABEL: @test_vrshrn_n_u64(
5896 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5897 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5898 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5899 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)5900 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
5901 return vrshrn_n_u64(a, 19);
5902 }
5903
5904 // CHECK-LABEL: @test_vrshrn_high_n_s16(
5905 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5906 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5907 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5908 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5909 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_s16(int8x8_t a,int16x8_t b)5910 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5911 return vrshrn_high_n_s16(a, b, 3);
5912 }
5913
5914 // CHECK-LABEL: @test_vrshrn_high_n_s32(
5915 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5916 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5917 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5918 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5919 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_s32(int16x4_t a,int32x4_t b)5920 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5921 return vrshrn_high_n_s32(a, b, 9);
5922 }
5923
5924 // CHECK-LABEL: @test_vrshrn_high_n_s64(
5925 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5926 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5927 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5928 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5929 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_s64(int32x2_t a,int64x2_t b)5930 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5931 return vrshrn_high_n_s64(a, b, 19);
5932 }
5933
5934 // CHECK-LABEL: @test_vrshrn_high_n_u16(
5935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5936 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5937 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5938 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5939 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5940 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5941 return vrshrn_high_n_u16(a, b, 3);
5942 }
5943
5944 // CHECK-LABEL: @test_vrshrn_high_n_u32(
5945 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5946 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5947 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5948 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5949 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5950 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5951 return vrshrn_high_n_u32(a, b, 9);
5952 }
5953
5954 // CHECK-LABEL: @test_vrshrn_high_n_u64(
5955 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5956 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5957 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5958 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5959 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5960 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5961 return vrshrn_high_n_u64(a, b, 19);
5962 }
5963
5964 // CHECK-LABEL: @test_vqrshrun_n_s16(
5965 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5966 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5967 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5968 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)5969 int8x8_t test_vqrshrun_n_s16(int16x8_t a) {
5970 return vqrshrun_n_s16(a, 3);
5971 }
5972
5973 // CHECK-LABEL: @test_vqrshrun_n_s32(
5974 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5975 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5976 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
5977 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)5978 int16x4_t test_vqrshrun_n_s32(int32x4_t a) {
5979 return vqrshrun_n_s32(a, 9);
5980 }
5981
5982 // CHECK-LABEL: @test_vqrshrun_n_s64(
5983 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5984 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5985 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
5986 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)5987 int32x2_t test_vqrshrun_n_s64(int64x2_t a) {
5988 return vqrshrun_n_s64(a, 19);
5989 }
5990
5991 // CHECK-LABEL: @test_vqrshrun_high_n_s16(
5992 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5993 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5994 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5995 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5996 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrun_high_n_s16(int8x8_t a,int16x8_t b)5997 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5998 return vqrshrun_high_n_s16(a, b, 3);
5999 }
6000
6001 // CHECK-LABEL: @test_vqrshrun_high_n_s32(
6002 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6003 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6004 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6005 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6006 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrun_high_n_s32(int16x4_t a,int32x4_t b)6007 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6008 return vqrshrun_high_n_s32(a, b, 9);
6009 }
6010
6011 // CHECK-LABEL: @test_vqrshrun_high_n_s64(
6012 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6013 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6014 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6015 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6016 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrun_high_n_s64(int32x2_t a,int64x2_t b)6017 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6018 return vqrshrun_high_n_s64(a, b, 19);
6019 }
6020
6021 // CHECK-LABEL: @test_vqshrn_n_s16(
6022 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6023 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6025 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)6026 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
6027 return vqshrn_n_s16(a, 3);
6028 }
6029
6030 // CHECK-LABEL: @test_vqshrn_n_s32(
6031 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6032 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6033 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6034 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)6035 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
6036 return vqshrn_n_s32(a, 9);
6037 }
6038
6039 // CHECK-LABEL: @test_vqshrn_n_s64(
6040 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6041 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6042 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6043 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)6044 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
6045 return vqshrn_n_s64(a, 19);
6046 }
6047
6048 // CHECK-LABEL: @test_vqshrn_n_u16(
6049 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6050 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6051 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6052 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)6053 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
6054 return vqshrn_n_u16(a, 3);
6055 }
6056
6057 // CHECK-LABEL: @test_vqshrn_n_u32(
6058 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6059 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6060 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6061 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)6062 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
6063 return vqshrn_n_u32(a, 9);
6064 }
6065
6066 // CHECK-LABEL: @test_vqshrn_n_u64(
6067 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6068 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6069 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6070 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)6071 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
6072 return vqshrn_n_u64(a, 19);
6073 }
6074
6075 // CHECK-LABEL: @test_vqshrn_high_n_s16(
6076 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6077 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6078 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6079 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6080 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_s16(int8x8_t a,int16x8_t b)6081 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6082 return vqshrn_high_n_s16(a, b, 3);
6083 }
6084
6085 // CHECK-LABEL: @test_vqshrn_high_n_s32(
6086 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6087 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6088 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6089 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6090 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_s32(int16x4_t a,int32x4_t b)6091 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6092 return vqshrn_high_n_s32(a, b, 9);
6093 }
6094
6095 // CHECK-LABEL: @test_vqshrn_high_n_s64(
6096 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6097 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6098 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6099 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6100 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_s64(int32x2_t a,int64x2_t b)6101 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6102 return vqshrn_high_n_s64(a, b, 19);
6103 }
6104
6105 // CHECK-LABEL: @test_vqshrn_high_n_u16(
6106 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6107 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6108 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6109 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6110 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6111 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6112 return vqshrn_high_n_u16(a, b, 3);
6113 }
6114
6115 // CHECK-LABEL: @test_vqshrn_high_n_u32(
6116 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6117 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6118 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6119 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6120 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6121 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6122 return vqshrn_high_n_u32(a, b, 9);
6123 }
6124
6125 // CHECK-LABEL: @test_vqshrn_high_n_u64(
6126 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6127 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6128 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6129 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6130 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6131 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6132 return vqshrn_high_n_u64(a, b, 19);
6133 }
6134
6135 // CHECK-LABEL: @test_vqrshrn_n_s16(
6136 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6137 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6138 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6139 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)6140 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
6141 return vqrshrn_n_s16(a, 3);
6142 }
6143
6144 // CHECK-LABEL: @test_vqrshrn_n_s32(
6145 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6146 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6147 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6148 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)6149 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
6150 return vqrshrn_n_s32(a, 9);
6151 }
6152
6153 // CHECK-LABEL: @test_vqrshrn_n_s64(
6154 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6155 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6156 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6157 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)6158 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
6159 return vqrshrn_n_s64(a, 19);
6160 }
6161
6162 // CHECK-LABEL: @test_vqrshrn_n_u16(
6163 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6164 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6165 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6166 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)6167 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
6168 return vqrshrn_n_u16(a, 3);
6169 }
6170
6171 // CHECK-LABEL: @test_vqrshrn_n_u32(
6172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6173 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6174 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6175 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)6176 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
6177 return vqrshrn_n_u32(a, 9);
6178 }
6179
6180 // CHECK-LABEL: @test_vqrshrn_n_u64(
6181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6182 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6183 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6184 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)6185 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
6186 return vqrshrn_n_u64(a, 19);
6187 }
6188
6189 // CHECK-LABEL: @test_vqrshrn_high_n_s16(
6190 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6191 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6192 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6193 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6194 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_s16(int8x8_t a,int16x8_t b)6195 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6196 return vqrshrn_high_n_s16(a, b, 3);
6197 }
6198
6199 // CHECK-LABEL: @test_vqrshrn_high_n_s32(
6200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6201 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6202 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6203 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6204 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_s32(int16x4_t a,int32x4_t b)6205 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6206 return vqrshrn_high_n_s32(a, b, 9);
6207 }
6208
6209 // CHECK-LABEL: @test_vqrshrn_high_n_s64(
6210 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6211 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6212 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6213 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6214 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_s64(int32x2_t a,int64x2_t b)6215 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6216 return vqrshrn_high_n_s64(a, b, 19);
6217 }
6218
6219 // CHECK-LABEL: @test_vqrshrn_high_n_u16(
6220 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6221 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6222 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6223 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6224 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6225 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6226 return vqrshrn_high_n_u16(a, b, 3);
6227 }
6228
6229 // CHECK-LABEL: @test_vqrshrn_high_n_u32(
6230 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6231 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6232 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6233 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6234 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6235 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6236 return vqrshrn_high_n_u32(a, b, 9);
6237 }
6238
6239 // CHECK-LABEL: @test_vqrshrn_high_n_u64(
6240 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6241 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6242 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6243 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6244 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6245 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6246 return vqrshrn_high_n_u64(a, b, 19);
6247 }
6248
6249 // CHECK-LABEL: @test_vshll_n_s8(
6250 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6251 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6252 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)6253 int16x8_t test_vshll_n_s8(int8x8_t a) {
6254 return vshll_n_s8(a, 3);
6255 }
6256
6257 // CHECK-LABEL: @test_vshll_n_s16(
6258 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6259 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6260 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6261 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6262 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)6263 int32x4_t test_vshll_n_s16(int16x4_t a) {
6264 return vshll_n_s16(a, 9);
6265 }
6266
6267 // CHECK-LABEL: @test_vshll_n_s32(
6268 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6270 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6271 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6272 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)6273 int64x2_t test_vshll_n_s32(int32x2_t a) {
6274 return vshll_n_s32(a, 19);
6275 }
6276
6277 // CHECK-LABEL: @test_vshll_n_u8(
6278 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6279 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6280 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)6281 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
6282 return vshll_n_u8(a, 3);
6283 }
6284
6285 // CHECK-LABEL: @test_vshll_n_u16(
6286 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6287 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6288 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6289 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6290 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)6291 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
6292 return vshll_n_u16(a, 9);
6293 }
6294
6295 // CHECK-LABEL: @test_vshll_n_u32(
6296 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6297 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6298 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6299 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6300 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)6301 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
6302 return vshll_n_u32(a, 19);
6303 }
6304
6305 // CHECK-LABEL: @test_vshll_high_n_s8(
6306 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6307 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6308 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6309 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_s8(int8x16_t a)6310 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
6311 return vshll_high_n_s8(a, 3);
6312 }
6313
6314 // CHECK-LABEL: @test_vshll_high_n_s16(
6315 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6316 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6317 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6318 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6319 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6320 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_s16(int16x8_t a)6321 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
6322 return vshll_high_n_s16(a, 9);
6323 }
6324
6325 // CHECK-LABEL: @test_vshll_high_n_s32(
6326 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6327 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6328 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6329 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6330 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6331 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_s32(int32x4_t a)6332 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
6333 return vshll_high_n_s32(a, 19);
6334 }
6335
6336 // CHECK-LABEL: @test_vshll_high_n_u8(
6337 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6338 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6339 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6340 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_u8(uint8x16_t a)6341 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
6342 return vshll_high_n_u8(a, 3);
6343 }
6344
6345 // CHECK-LABEL: @test_vshll_high_n_u16(
6346 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6347 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6348 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6349 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6350 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6351 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_u16(uint16x8_t a)6352 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
6353 return vshll_high_n_u16(a, 9);
6354 }
6355
6356 // CHECK-LABEL: @test_vshll_high_n_u32(
6357 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6358 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6359 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6360 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6361 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6362 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_u32(uint32x4_t a)6363 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
6364 return vshll_high_n_u32(a, 19);
6365 }
6366
6367 // CHECK-LABEL: @test_vmovl_s8(
6368 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6369 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)6370 int16x8_t test_vmovl_s8(int8x8_t a) {
6371 return vmovl_s8(a);
6372 }
6373
6374 // CHECK-LABEL: @test_vmovl_s16(
6375 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6376 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6377 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)6378 int32x4_t test_vmovl_s16(int16x4_t a) {
6379 return vmovl_s16(a);
6380 }
6381
6382 // CHECK-LABEL: @test_vmovl_s32(
6383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6384 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6385 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)6386 int64x2_t test_vmovl_s32(int32x2_t a) {
6387 return vmovl_s32(a);
6388 }
6389
6390 // CHECK-LABEL: @test_vmovl_u8(
6391 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6392 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)6393 uint16x8_t test_vmovl_u8(uint8x8_t a) {
6394 return vmovl_u8(a);
6395 }
6396
6397 // CHECK-LABEL: @test_vmovl_u16(
6398 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6399 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6400 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)6401 uint32x4_t test_vmovl_u16(uint16x4_t a) {
6402 return vmovl_u16(a);
6403 }
6404
6405 // CHECK-LABEL: @test_vmovl_u32(
6406 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6407 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6408 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)6409 uint64x2_t test_vmovl_u32(uint32x2_t a) {
6410 return vmovl_u32(a);
6411 }
6412
6413 // CHECK-LABEL: @test_vmovl_high_s8(
6414 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6415 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6416 // CHECK: ret <8 x i16> [[TMP0]]
test_vmovl_high_s8(int8x16_t a)6417 int16x8_t test_vmovl_high_s8(int8x16_t a) {
6418 return vmovl_high_s8(a);
6419 }
6420
6421 // CHECK-LABEL: @test_vmovl_high_s16(
6422 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6423 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6424 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6425 // CHECK: ret <4 x i32> [[TMP1]]
test_vmovl_high_s16(int16x8_t a)6426 int32x4_t test_vmovl_high_s16(int16x8_t a) {
6427 return vmovl_high_s16(a);
6428 }
6429
6430 // CHECK-LABEL: @test_vmovl_high_s32(
6431 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6432 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6433 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6434 // CHECK: ret <2 x i64> [[TMP1]]
test_vmovl_high_s32(int32x4_t a)6435 int64x2_t test_vmovl_high_s32(int32x4_t a) {
6436 return vmovl_high_s32(a);
6437 }
6438
6439 // CHECK-LABEL: @test_vmovl_high_u8(
6440 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6441 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6442 // CHECK: ret <8 x i16> [[TMP0]]
test_vmovl_high_u8(uint8x16_t a)6443 uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
6444 return vmovl_high_u8(a);
6445 }
6446
6447 // CHECK-LABEL: @test_vmovl_high_u16(
6448 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6449 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6450 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6451 // CHECK: ret <4 x i32> [[TMP1]]
test_vmovl_high_u16(uint16x8_t a)6452 uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
6453 return vmovl_high_u16(a);
6454 }
6455
6456 // CHECK-LABEL: @test_vmovl_high_u32(
6457 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6458 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6459 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6460 // CHECK: ret <2 x i64> [[TMP1]]
test_vmovl_high_u32(uint32x4_t a)6461 uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
6462 return vmovl_high_u32(a);
6463 }
6464
6465 // CHECK-LABEL: @test_vcvt_n_f32_s32(
6466 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6467 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6468 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6469 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)6470 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
6471 return vcvt_n_f32_s32(a, 31);
6472 }
6473
6474 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
6475 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6476 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6477 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6478 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)6479 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
6480 return vcvtq_n_f32_s32(a, 31);
6481 }
6482
6483 // CHECK-LABEL: @test_vcvtq_n_f64_s64(
6484 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6485 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6486 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6487 // CHECK: ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_s64(int64x2_t a)6488 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
6489 return vcvtq_n_f64_s64(a, 50);
6490 }
6491
6492 // CHECK-LABEL: @test_vcvt_n_f32_u32(
6493 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6494 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6495 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6496 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)6497 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
6498 return vcvt_n_f32_u32(a, 31);
6499 }
6500
6501 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
6502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6503 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6504 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6505 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)6506 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
6507 return vcvtq_n_f32_u32(a, 31);
6508 }
6509
6510 // CHECK-LABEL: @test_vcvtq_n_f64_u64(
6511 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6512 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6513 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6514 // CHECK: ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_u64(uint64x2_t a)6515 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
6516 return vcvtq_n_f64_u64(a, 50);
6517 }
6518
6519 // CHECK-LABEL: @test_vcvt_n_s32_f32(
6520 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6521 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6522 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6523 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)6524 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
6525 return vcvt_n_s32_f32(a, 31);
6526 }
6527
6528 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
6529 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6530 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6531 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6532 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)6533 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
6534 return vcvtq_n_s32_f32(a, 31);
6535 }
6536
6537 // CHECK-LABEL: @test_vcvtq_n_s64_f64(
6538 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6539 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6540 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6541 // CHECK: ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_s64_f64(float64x2_t a)6542 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
6543 return vcvtq_n_s64_f64(a, 50);
6544 }
6545
6546 // CHECK-LABEL: @test_vcvt_n_u32_f32(
6547 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6548 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6549 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6550 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)6551 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
6552 return vcvt_n_u32_f32(a, 31);
6553 }
6554
6555 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
6556 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6557 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6558 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6559 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)6560 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
6561 return vcvtq_n_u32_f32(a, 31);
6562 }
6563
6564 // CHECK-LABEL: @test_vcvtq_n_u64_f64(
6565 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6566 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6567 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6568 // CHECK: ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_u64_f64(float64x2_t a)6569 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
6570 return vcvtq_n_u64_f64(a, 50);
6571 }
6572
6573 // CHECK-LABEL: @test_vaddl_s8(
6574 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6575 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6576 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6577 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)6578 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
6579 return vaddl_s8(a, b);
6580 }
6581
6582 // CHECK-LABEL: @test_vaddl_s16(
6583 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6584 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6585 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6586 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6587 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6588 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)6589 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
6590 return vaddl_s16(a, b);
6591 }
6592
6593 // CHECK-LABEL: @test_vaddl_s32(
6594 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6595 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6596 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6597 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6598 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6599 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)6600 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
6601 return vaddl_s32(a, b);
6602 }
6603
6604 // CHECK-LABEL: @test_vaddl_u8(
6605 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6606 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6607 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6608 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)6609 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
6610 return vaddl_u8(a, b);
6611 }
6612
6613 // CHECK-LABEL: @test_vaddl_u16(
6614 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6615 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6616 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6617 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6618 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6619 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)6620 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
6621 return vaddl_u16(a, b);
6622 }
6623
6624 // CHECK-LABEL: @test_vaddl_u32(
6625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6626 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6627 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6628 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6629 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6630 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)6631 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
6632 return vaddl_u32(a, b);
6633 }
6634
6635 // CHECK-LABEL: @test_vaddl_high_s8(
6636 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6637 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6638 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6639 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6640 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6641 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_high_s8(int8x16_t a,int8x16_t b)6642 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
6643 return vaddl_high_s8(a, b);
6644 }
6645
6646 // CHECK-LABEL: @test_vaddl_high_s16(
6647 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6648 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6649 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6650 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6651 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6652 // CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6653 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6654 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_high_s16(int16x8_t a,int16x8_t b)6655 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
6656 return vaddl_high_s16(a, b);
6657 }
6658
6659 // CHECK-LABEL: @test_vaddl_high_s32(
6660 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6661 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6662 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6663 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6664 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6665 // CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6666 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6667 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_high_s32(int32x4_t a,int32x4_t b)6668 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
6669 return vaddl_high_s32(a, b);
6670 }
6671
6672 // CHECK-LABEL: @test_vaddl_high_u8(
6673 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6674 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6675 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6676 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6677 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6678 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_high_u8(uint8x16_t a,uint8x16_t b)6679 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
6680 return vaddl_high_u8(a, b);
6681 }
6682
6683 // CHECK-LABEL: @test_vaddl_high_u16(
6684 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6685 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6686 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6687 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6688 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6689 // CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6690 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6691 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_high_u16(uint16x8_t a,uint16x8_t b)6692 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
6693 return vaddl_high_u16(a, b);
6694 }
6695
6696 // CHECK-LABEL: @test_vaddl_high_u32(
6697 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6698 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6699 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6700 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6701 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6702 // CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6703 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6704 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_high_u32(uint32x4_t a,uint32x4_t b)6705 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
6706 return vaddl_high_u32(a, b);
6707 }
6708
6709 // CHECK-LABEL: @test_vaddw_s8(
6710 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6711 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6712 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)6713 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
6714 return vaddw_s8(a, b);
6715 }
6716
6717 // CHECK-LABEL: @test_vaddw_s16(
6718 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6719 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6720 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6721 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)6722 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
6723 return vaddw_s16(a, b);
6724 }
6725
6726 // CHECK-LABEL: @test_vaddw_s32(
6727 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6728 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6729 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6730 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)6731 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
6732 return vaddw_s32(a, b);
6733 }
6734
6735 // CHECK-LABEL: @test_vaddw_u8(
6736 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6737 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6738 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)6739 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
6740 return vaddw_u8(a, b);
6741 }
6742
6743 // CHECK-LABEL: @test_vaddw_u16(
6744 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6745 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6746 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6747 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)6748 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
6749 return vaddw_u16(a, b);
6750 }
6751
6752 // CHECK-LABEL: @test_vaddw_u32(
6753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6754 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6755 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6756 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)6757 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
6758 return vaddw_u32(a, b);
6759 }
6760
6761 // CHECK-LABEL: @test_vaddw_high_s8(
6762 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6763 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6764 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6765 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_high_s8(int16x8_t a,int8x16_t b)6766 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
6767 return vaddw_high_s8(a, b);
6768 }
6769
6770 // CHECK-LABEL: @test_vaddw_high_s16(
6771 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6773 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6774 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6775 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_high_s16(int32x4_t a,int16x8_t b)6776 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
6777 return vaddw_high_s16(a, b);
6778 }
6779
6780 // CHECK-LABEL: @test_vaddw_high_s32(
6781 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6782 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6783 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6784 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6785 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_high_s32(int64x2_t a,int32x4_t b)6786 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
6787 return vaddw_high_s32(a, b);
6788 }
6789
6790 // CHECK-LABEL: @test_vaddw_high_u8(
6791 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6792 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6793 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6794 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_high_u8(uint16x8_t a,uint8x16_t b)6795 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
6796 return vaddw_high_u8(a, b);
6797 }
6798
6799 // CHECK-LABEL: @test_vaddw_high_u16(
6800 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6801 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6802 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6803 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6804 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_high_u16(uint32x4_t a,uint16x8_t b)6805 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
6806 return vaddw_high_u16(a, b);
6807 }
6808
6809 // CHECK-LABEL: @test_vaddw_high_u32(
6810 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6812 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6813 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6814 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_high_u32(uint64x2_t a,uint32x4_t b)6815 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
6816 return vaddw_high_u32(a, b);
6817 }
6818
6819 // CHECK-LABEL: @test_vsubl_s8(
6820 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6821 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6822 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6823 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)6824 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
6825 return vsubl_s8(a, b);
6826 }
6827
6828 // CHECK-LABEL: @test_vsubl_s16(
6829 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6830 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6831 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6832 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6833 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6834 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)6835 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
6836 return vsubl_s16(a, b);
6837 }
6838
6839 // CHECK-LABEL: @test_vsubl_s32(
6840 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6841 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6842 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6843 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6844 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6845 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)6846 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
6847 return vsubl_s32(a, b);
6848 }
6849
6850 // CHECK-LABEL: @test_vsubl_u8(
6851 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6852 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6853 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6854 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)6855 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
6856 return vsubl_u8(a, b);
6857 }
6858
6859 // CHECK-LABEL: @test_vsubl_u16(
6860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6861 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6862 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6863 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6864 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6865 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)6866 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
6867 return vsubl_u16(a, b);
6868 }
6869
6870 // CHECK-LABEL: @test_vsubl_u32(
6871 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6872 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6873 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6874 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6875 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6876 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)6877 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
6878 return vsubl_u32(a, b);
6879 }
6880
6881 // CHECK-LABEL: @test_vsubl_high_s8(
6882 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6883 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6884 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6885 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6886 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6887 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_high_s8(int8x16_t a,int8x16_t b)6888 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
6889 return vsubl_high_s8(a, b);
6890 }
6891
6892 // CHECK-LABEL: @test_vsubl_high_s16(
6893 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6895 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6896 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6897 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6898 // CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6899 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6900 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_high_s16(int16x8_t a,int16x8_t b)6901 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
6902 return vsubl_high_s16(a, b);
6903 }
6904
6905 // CHECK-LABEL: @test_vsubl_high_s32(
6906 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6907 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6908 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6909 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6910 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6911 // CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6912 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6913 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_high_s32(int32x4_t a,int32x4_t b)6914 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
6915 return vsubl_high_s32(a, b);
6916 }
6917
6918 // CHECK-LABEL: @test_vsubl_high_u8(
6919 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6920 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6921 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6922 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6923 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6924 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_high_u8(uint8x16_t a,uint8x16_t b)6925 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
6926 return vsubl_high_u8(a, b);
6927 }
6928
6929 // CHECK-LABEL: @test_vsubl_high_u16(
6930 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6931 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6932 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6933 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6934 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6935 // CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6936 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6937 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_high_u16(uint16x8_t a,uint16x8_t b)6938 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
6939 return vsubl_high_u16(a, b);
6940 }
6941
6942 // CHECK-LABEL: @test_vsubl_high_u32(
6943 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6944 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6945 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6946 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6947 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6948 // CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6949 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6950 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_high_u32(uint32x4_t a,uint32x4_t b)6951 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
6952 return vsubl_high_u32(a, b);
6953 }
6954
6955 // CHECK-LABEL: @test_vsubw_s8(
6956 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6957 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6958 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)6959 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
6960 return vsubw_s8(a, b);
6961 }
6962
6963 // CHECK-LABEL: @test_vsubw_s16(
6964 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6965 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6966 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6967 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)6968 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
6969 return vsubw_s16(a, b);
6970 }
6971
6972 // CHECK-LABEL: @test_vsubw_s32(
6973 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6974 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6975 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
6976 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)6977 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
6978 return vsubw_s32(a, b);
6979 }
6980
6981 // CHECK-LABEL: @test_vsubw_u8(
6982 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6983 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6984 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)6985 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
6986 return vsubw_u8(a, b);
6987 }
6988
6989 // CHECK-LABEL: @test_vsubw_u16(
6990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6991 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6992 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6993 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)6994 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
6995 return vsubw_u16(a, b);
6996 }
6997
6998 // CHECK-LABEL: @test_vsubw_u32(
6999 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7000 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
7001 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7002 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)7003 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
7004 return vsubw_u32(a, b);
7005 }
7006
7007 // CHECK-LABEL: @test_vsubw_high_s8(
7008 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7009 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7010 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7011 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_high_s8(int16x8_t a,int8x16_t b)7012 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
7013 return vsubw_high_s8(a, b);
7014 }
7015
7016 // CHECK-LABEL: @test_vsubw_high_s16(
7017 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7018 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7019 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7020 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7021 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_high_s16(int32x4_t a,int16x8_t b)7022 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
7023 return vsubw_high_s16(a, b);
7024 }
7025
7026 // CHECK-LABEL: @test_vsubw_high_s32(
7027 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7028 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7029 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7030 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7031 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_high_s32(int64x2_t a,int32x4_t b)7032 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
7033 return vsubw_high_s32(a, b);
7034 }
7035
7036 // CHECK-LABEL: @test_vsubw_high_u8(
7037 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7038 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7039 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7040 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_high_u8(uint16x8_t a,uint8x16_t b)7041 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
7042 return vsubw_high_u8(a, b);
7043 }
7044
7045 // CHECK-LABEL: @test_vsubw_high_u16(
7046 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7047 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7048 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7049 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7050 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_high_u16(uint32x4_t a,uint16x8_t b)7051 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
7052 return vsubw_high_u16(a, b);
7053 }
7054
7055 // CHECK-LABEL: @test_vsubw_high_u32(
7056 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7057 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7058 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7059 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7060 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_high_u32(uint64x2_t a,uint32x4_t b)7061 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
7062 return vsubw_high_u32(a, b);
7063 }
7064
7065 // CHECK-LABEL: @test_vaddhn_s16(
7066 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7067 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7068 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7069 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7070 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7071 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)7072 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
7073 return vaddhn_s16(a, b);
7074 }
7075
7076 // CHECK-LABEL: @test_vaddhn_s32(
7077 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7078 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7079 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7080 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7081 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7082 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)7083 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
7084 return vaddhn_s32(a, b);
7085 }
7086
7087 // CHECK-LABEL: @test_vaddhn_s64(
7088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7090 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7091 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7092 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7093 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)7094 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
7095 return vaddhn_s64(a, b);
7096 }
7097
7098 // CHECK-LABEL: @test_vaddhn_u16(
7099 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7100 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7101 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7102 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7103 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7104 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)7105 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
7106 return vaddhn_u16(a, b);
7107 }
7108
7109 // CHECK-LABEL: @test_vaddhn_u32(
7110 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7111 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7112 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7113 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7114 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7115 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)7116 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
7117 return vaddhn_u32(a, b);
7118 }
7119
7120 // CHECK-LABEL: @test_vaddhn_u64(
7121 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7122 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7123 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7124 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7125 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7126 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)7127 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
7128 return vaddhn_u64(a, b);
7129 }
7130
7131 // CHECK-LABEL: @test_vaddhn_high_s16(
7132 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7133 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7134 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7135 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7136 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7137 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7138 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7139 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7140 return vaddhn_high_s16(r, a, b);
7141 }
7142
7143 // CHECK-LABEL: @test_vaddhn_high_s32(
7144 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7145 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7146 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7147 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7148 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7149 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7150 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7151 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7152 return vaddhn_high_s32(r, a, b);
7153 }
7154
7155 // CHECK-LABEL: @test_vaddhn_high_s64(
7156 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7157 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7158 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7159 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7160 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7161 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7162 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7163 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7164 return vaddhn_high_s64(r, a, b);
7165 }
7166
7167 // CHECK-LABEL: @test_vaddhn_high_u16(
7168 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7169 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7170 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7171 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7172 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7173 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7174 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7175 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7176 return vaddhn_high_u16(r, a, b);
7177 }
7178
7179 // CHECK-LABEL: @test_vaddhn_high_u32(
7180 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7181 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7182 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7183 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7184 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7185 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7186 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7187 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7188 return vaddhn_high_u32(r, a, b);
7189 }
7190
7191 // CHECK-LABEL: @test_vaddhn_high_u64(
7192 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7193 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7194 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7195 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7196 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7197 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7198 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7199 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7200 return vaddhn_high_u64(r, a, b);
7201 }
7202
7203 // CHECK-LABEL: @test_vraddhn_s16(
7204 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7205 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7206 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7207 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)7208 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
7209 return vraddhn_s16(a, b);
7210 }
7211
7212 // CHECK-LABEL: @test_vraddhn_s32(
7213 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7214 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7215 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7216 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7217 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)7218 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
7219 return vraddhn_s32(a, b);
7220 }
7221
7222 // CHECK-LABEL: @test_vraddhn_s64(
7223 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7224 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7225 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7226 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7227 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)7228 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
7229 return vraddhn_s64(a, b);
7230 }
7231
7232 // CHECK-LABEL: @test_vraddhn_u16(
7233 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7234 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7235 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7236 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)7237 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
7238 return vraddhn_u16(a, b);
7239 }
7240
7241 // CHECK-LABEL: @test_vraddhn_u32(
7242 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7243 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7244 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7245 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7246 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)7247 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
7248 return vraddhn_u32(a, b);
7249 }
7250
7251 // CHECK-LABEL: @test_vraddhn_u64(
7252 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7253 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7254 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7255 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7256 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)7257 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
7258 return vraddhn_u64(a, b);
7259 }
7260
7261 // CHECK-LABEL: @test_vraddhn_high_s16(
7262 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7263 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7264 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7265 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7266 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7267 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7268 return vraddhn_high_s16(r, a, b);
7269 }
7270
7271 // CHECK-LABEL: @test_vraddhn_high_s32(
7272 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7273 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7274 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7275 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7276 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7277 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7278 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7279 return vraddhn_high_s32(r, a, b);
7280 }
7281
7282 // CHECK-LABEL: @test_vraddhn_high_s64(
7283 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7284 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7285 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7286 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7287 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7288 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7289 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7290 return vraddhn_high_s64(r, a, b);
7291 }
7292
7293 // CHECK-LABEL: @test_vraddhn_high_u16(
7294 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7295 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7296 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7297 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7298 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7299 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7300 return vraddhn_high_u16(r, a, b);
7301 }
7302
7303 // CHECK-LABEL: @test_vraddhn_high_u32(
7304 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7305 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7306 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7307 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7308 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7309 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7310 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7311 return vraddhn_high_u32(r, a, b);
7312 }
7313
7314 // CHECK-LABEL: @test_vraddhn_high_u64(
7315 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7316 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7317 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7318 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7319 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7320 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7321 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7322 return vraddhn_high_u64(r, a, b);
7323 }
7324
7325 // CHECK-LABEL: @test_vsubhn_s16(
7326 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7327 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7328 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7329 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7330 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7331 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)7332 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
7333 return vsubhn_s16(a, b);
7334 }
7335
7336 // CHECK-LABEL: @test_vsubhn_s32(
7337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7339 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7340 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7341 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7342 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)7343 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
7344 return vsubhn_s32(a, b);
7345 }
7346
7347 // CHECK-LABEL: @test_vsubhn_s64(
7348 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7349 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7350 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7351 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7352 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7353 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)7354 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
7355 return vsubhn_s64(a, b);
7356 }
7357
7358 // CHECK-LABEL: @test_vsubhn_u16(
7359 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7360 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7361 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7362 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7363 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7364 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)7365 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
7366 return vsubhn_u16(a, b);
7367 }
7368
7369 // CHECK-LABEL: @test_vsubhn_u32(
7370 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7371 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7372 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7373 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7374 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7375 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)7376 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
7377 return vsubhn_u32(a, b);
7378 }
7379
7380 // CHECK-LABEL: @test_vsubhn_u64(
7381 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7382 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7383 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7384 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7385 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7386 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)7387 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
7388 return vsubhn_u64(a, b);
7389 }
7390
7391 // CHECK-LABEL: @test_vsubhn_high_s16(
7392 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7393 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7394 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7395 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7396 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7397 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7398 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7399 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7400 return vsubhn_high_s16(r, a, b);
7401 }
7402
7403 // CHECK-LABEL: @test_vsubhn_high_s32(
7404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7405 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7406 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7407 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7408 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7409 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7410 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7411 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7412 return vsubhn_high_s32(r, a, b);
7413 }
7414
7415 // CHECK-LABEL: @test_vsubhn_high_s64(
7416 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7417 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7418 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7419 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7420 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7421 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7422 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7423 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7424 return vsubhn_high_s64(r, a, b);
7425 }
7426
7427 // CHECK-LABEL: @test_vsubhn_high_u16(
7428 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7429 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7430 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7431 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7432 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7433 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7434 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7435 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7436 return vsubhn_high_u16(r, a, b);
7437 }
7438
7439 // CHECK-LABEL: @test_vsubhn_high_u32(
7440 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7441 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7442 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7443 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7444 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7445 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7446 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7447 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7448 return vsubhn_high_u32(r, a, b);
7449 }
7450
7451 // CHECK-LABEL: @test_vsubhn_high_u64(
7452 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7453 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7454 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7455 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7456 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7457 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7458 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7459 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7460 return vsubhn_high_u64(r, a, b);
7461 }
7462
7463 // CHECK-LABEL: @test_vrsubhn_s16(
7464 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7465 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7466 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7467 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)7468 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
7469 return vrsubhn_s16(a, b);
7470 }
7471
7472 // CHECK-LABEL: @test_vrsubhn_s32(
7473 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7474 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7475 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7476 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7477 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)7478 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
7479 return vrsubhn_s32(a, b);
7480 }
7481
7482 // CHECK-LABEL: @test_vrsubhn_s64(
7483 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7484 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7485 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7486 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7487 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)7488 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
7489 return vrsubhn_s64(a, b);
7490 }
7491
7492 // CHECK-LABEL: @test_vrsubhn_u16(
7493 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7494 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7495 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7496 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)7497 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
7498 return vrsubhn_u16(a, b);
7499 }
7500
7501 // CHECK-LABEL: @test_vrsubhn_u32(
7502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7503 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7504 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7505 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7506 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)7507 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
7508 return vrsubhn_u32(a, b);
7509 }
7510
7511 // CHECK-LABEL: @test_vrsubhn_u64(
7512 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7513 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7514 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7515 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7516 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)7517 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
7518 return vrsubhn_u64(a, b);
7519 }
7520
7521 // CHECK-LABEL: @test_vrsubhn_high_s16(
7522 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7523 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7524 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7525 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7526 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7527 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7528 return vrsubhn_high_s16(r, a, b);
7529 }
7530
7531 // CHECK-LABEL: @test_vrsubhn_high_s32(
7532 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7533 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7534 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7535 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7536 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7537 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7538 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7539 return vrsubhn_high_s32(r, a, b);
7540 }
7541
7542 // CHECK-LABEL: @test_vrsubhn_high_s64(
7543 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7544 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7545 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7546 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7547 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7548 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7549 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7550 return vrsubhn_high_s64(r, a, b);
7551 }
7552
7553 // CHECK-LABEL: @test_vrsubhn_high_u16(
7554 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7555 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7556 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7557 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7558 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7559 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7560 return vrsubhn_high_u16(r, a, b);
7561 }
7562
7563 // CHECK-LABEL: @test_vrsubhn_high_u32(
7564 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7565 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7566 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7567 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7568 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7569 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7570 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7571 return vrsubhn_high_u32(r, a, b);
7572 }
7573
7574 // CHECK-LABEL: @test_vrsubhn_high_u64(
7575 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7576 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7577 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7578 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7579 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7580 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7581 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7582 return vrsubhn_high_u64(r, a, b);
7583 }
7584
7585 // CHECK-LABEL: @test_vabdl_s8(
7586 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7587 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7588 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)7589 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
7590 return vabdl_s8(a, b);
7591 }
7592
7593 // CHECK-LABEL: @test_vabdl_s16(
7594 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7595 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7596 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7597 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7598 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7599 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)7600 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
7601 return vabdl_s16(a, b);
7602 }
7603
7604 // CHECK-LABEL: @test_vabdl_s32(
7605 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7606 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7607 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7608 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7609 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7610 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)7611 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
7612 return vabdl_s32(a, b);
7613 }
7614
7615 // CHECK-LABEL: @test_vabdl_u8(
7616 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7617 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7618 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)7619 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
7620 return vabdl_u8(a, b);
7621 }
7622
7623 // CHECK-LABEL: @test_vabdl_u16(
7624 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7625 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7626 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7627 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7628 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7629 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)7630 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
7631 return vabdl_u16(a, b);
7632 }
7633
7634 // CHECK-LABEL: @test_vabdl_u32(
7635 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7636 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7637 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7638 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7639 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7640 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)7641 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
7642 return vabdl_u32(a, b);
7643 }
7644
7645 // CHECK-LABEL: @test_vabal_s8(
7646 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7647 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7648 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7649 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7650 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7651 return vabal_s8(a, b, c);
7652 }
7653
7654 // CHECK-LABEL: @test_vabal_s16(
7655 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7656 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7657 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7658 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7659 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7660 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7661 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7662 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7663 return vabal_s16(a, b, c);
7664 }
7665
7666 // CHECK-LABEL: @test_vabal_s32(
7667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7668 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7669 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7670 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7671 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7672 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7673 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7674 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7675 return vabal_s32(a, b, c);
7676 }
7677
7678 // CHECK-LABEL: @test_vabal_u8(
7679 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7680 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7681 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7682 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7683 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7684 return vabal_u8(a, b, c);
7685 }
7686
7687 // CHECK-LABEL: @test_vabal_u16(
7688 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7689 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7690 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7691 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7692 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7693 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7694 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7695 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7696 return vabal_u16(a, b, c);
7697 }
7698
7699 // CHECK-LABEL: @test_vabal_u32(
7700 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7701 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7702 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7703 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7704 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7705 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7706 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7707 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7708 return vabal_u32(a, b, c);
7709 }
7710
7711 // CHECK-LABEL: @test_vabdl_high_s8(
7712 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7713 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7714 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7715 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7716 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_s8(int8x16_t a,int8x16_t b)7717 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
7718 return vabdl_high_s8(a, b);
7719 }
7720
7721 // CHECK-LABEL: @test_vabdl_high_s16(
7722 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7723 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7724 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7725 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7726 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7727 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7728 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7729 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_s16(int16x8_t a,int16x8_t b)7730 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
7731 return vabdl_high_s16(a, b);
7732 }
7733
7734 // CHECK-LABEL: @test_vabdl_high_s32(
7735 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7736 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7738 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7739 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7740 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7741 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7742 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_s32(int32x4_t a,int32x4_t b)7743 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
7744 return vabdl_high_s32(a, b);
7745 }
7746
7747 // CHECK-LABEL: @test_vabdl_high_u8(
7748 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7749 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7750 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7751 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7752 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_u8(uint8x16_t a,uint8x16_t b)7753 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
7754 return vabdl_high_u8(a, b);
7755 }
7756
7757 // CHECK-LABEL: @test_vabdl_high_u16(
7758 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7759 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7760 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7761 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7762 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7763 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7764 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7765 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_u16(uint16x8_t a,uint16x8_t b)7766 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
7767 return vabdl_high_u16(a, b);
7768 }
7769
7770 // CHECK-LABEL: @test_vabdl_high_u32(
7771 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7772 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7773 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7774 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7775 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7776 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7777 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7778 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_u32(uint32x4_t a,uint32x4_t b)7779 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
7780 return vabdl_high_u32(a, b);
7781 }
7782
7783 // CHECK-LABEL: @test_vabal_high_s8(
7784 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7785 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7786 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7787 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7788 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7789 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vabal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)7790 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
7791 return vabal_high_s8(a, b, c);
7792 }
7793
7794 // CHECK-LABEL: @test_vabal_high_s16(
7795 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7796 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7798 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7799 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7800 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7801 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7802 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7803 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vabal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)7804 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
7805 return vabal_high_s16(a, b, c);
7806 }
7807
7808 // CHECK-LABEL: @test_vabal_high_s32(
7809 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7810 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7812 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7813 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7814 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7815 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7816 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7817 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vabal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)7818 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
7819 return vabal_high_s32(a, b, c);
7820 }
7821
7822 // CHECK-LABEL: @test_vabal_high_u8(
7823 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7824 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7825 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7826 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7827 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7828 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vabal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)7829 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
7830 return vabal_high_u8(a, b, c);
7831 }
7832
7833 // CHECK-LABEL: @test_vabal_high_u16(
7834 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7835 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7836 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7837 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7838 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7839 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7840 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7841 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7842 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vabal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)7843 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
7844 return vabal_high_u16(a, b, c);
7845 }
7846
7847 // CHECK-LABEL: @test_vabal_high_u32(
7848 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7849 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7850 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7851 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7852 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7853 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7854 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7855 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7856 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vabal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)7857 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
7858 return vabal_high_u32(a, b, c);
7859 }
7860
7861 // CHECK-LABEL: @test_vmull_s8(
7862 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
7863 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)7864 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
7865 return vmull_s8(a, b);
7866 }
7867
7868 // CHECK-LABEL: @test_vmull_s16(
7869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7870 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7871 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
7872 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)7873 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
7874 return vmull_s16(a, b);
7875 }
7876
7877 // CHECK-LABEL: @test_vmull_s32(
7878 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7879 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7880 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
7881 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)7882 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
7883 return vmull_s32(a, b);
7884 }
7885
7886 // CHECK-LABEL: @test_vmull_u8(
7887 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
7888 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)7889 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
7890 return vmull_u8(a, b);
7891 }
7892
7893 // CHECK-LABEL: @test_vmull_u16(
7894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7895 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7896 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
7897 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)7898 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
7899 return vmull_u16(a, b);
7900 }
7901
7902 // CHECK-LABEL: @test_vmull_u32(
7903 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7904 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7905 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
7906 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)7907 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
7908 return vmull_u32(a, b);
7909 }
7910
7911 // CHECK-LABEL: @test_vmull_high_s8(
7912 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7913 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7914 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7915 // CHECK: ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_s8(int8x16_t a,int8x16_t b)7916 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
7917 return vmull_high_s8(a, b);
7918 }
7919
7920 // CHECK-LABEL: @test_vmull_high_s16(
7921 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7922 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7925 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7926 // CHECK: ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_s16(int16x8_t a,int16x8_t b)7927 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
7928 return vmull_high_s16(a, b);
7929 }
7930
7931 // CHECK-LABEL: @test_vmull_high_s32(
7932 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7933 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7934 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7935 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7936 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7937 // CHECK: ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_s32(int32x4_t a,int32x4_t b)7938 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
7939 return vmull_high_s32(a, b);
7940 }
7941
7942 // CHECK-LABEL: @test_vmull_high_u8(
7943 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7944 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7945 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7946 // CHECK: ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_u8(uint8x16_t a,uint8x16_t b)7947 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
7948 return vmull_high_u8(a, b);
7949 }
7950
7951 // CHECK-LABEL: @test_vmull_high_u16(
7952 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7953 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7954 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7955 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7956 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7957 // CHECK: ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_u16(uint16x8_t a,uint16x8_t b)7958 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
7959 return vmull_high_u16(a, b);
7960 }
7961
7962 // CHECK-LABEL: @test_vmull_high_u32(
7963 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7964 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7965 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7966 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7967 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7968 // CHECK: ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_u32(uint32x4_t a,uint32x4_t b)7969 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
7970 return vmull_high_u32(a, b);
7971 }
7972
7973 // CHECK-LABEL: @test_vmlal_s8(
7974 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
7975 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7976 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7977 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7978 return vmlal_s8(a, b, c);
7979 }
7980
7981 // CHECK-LABEL: @test_vmlal_s16(
7982 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7983 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7984 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
7985 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7986 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7987 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7988 return vmlal_s16(a, b, c);
7989 }
7990
7991 // CHECK-LABEL: @test_vmlal_s32(
7992 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7993 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7994 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
7995 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7996 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7997 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7998 return vmlal_s32(a, b, c);
7999 }
8000
8001 // CHECK-LABEL: @test_vmlal_u8(
8002 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8003 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8004 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8005 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8006 return vmlal_u8(a, b, c);
8007 }
8008
8009 // CHECK-LABEL: @test_vmlal_u16(
8010 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8011 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8012 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8013 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8014 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8015 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8016 return vmlal_u16(a, b, c);
8017 }
8018
8019 // CHECK-LABEL: @test_vmlal_u32(
8020 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8021 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8022 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8023 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8024 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8025 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8026 return vmlal_u32(a, b, c);
8027 }
8028
8029 // CHECK-LABEL: @test_vmlal_high_s8(
8030 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8031 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8032 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8033 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8034 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8035 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8036 return vmlal_high_s8(a, b, c);
8037 }
8038
8039 // CHECK-LABEL: @test_vmlal_high_s16(
8040 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8041 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8042 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8043 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8044 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8045 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8046 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8047 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8048 return vmlal_high_s16(a, b, c);
8049 }
8050
8051 // CHECK-LABEL: @test_vmlal_high_s32(
8052 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8053 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8054 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8055 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8056 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8057 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8058 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8059 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8060 return vmlal_high_s32(a, b, c);
8061 }
8062
8063 // CHECK-LABEL: @test_vmlal_high_u8(
8064 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8065 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8066 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8067 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8068 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8069 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8070 return vmlal_high_u8(a, b, c);
8071 }
8072
8073 // CHECK-LABEL: @test_vmlal_high_u16(
8074 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8075 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8076 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8077 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8078 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8079 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8080 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8081 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8082 return vmlal_high_u16(a, b, c);
8083 }
8084
8085 // CHECK-LABEL: @test_vmlal_high_u32(
8086 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8087 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8090 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8091 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8092 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8093 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8094 return vmlal_high_u32(a, b, c);
8095 }
8096
8097 // CHECK-LABEL: @test_vmlsl_s8(
8098 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
8099 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8100 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)8101 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8102 return vmlsl_s8(a, b, c);
8103 }
8104
8105 // CHECK-LABEL: @test_vmlsl_s16(
8106 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8107 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8108 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
8109 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8110 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8111 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8112 return vmlsl_s16(a, b, c);
8113 }
8114
8115 // CHECK-LABEL: @test_vmlsl_s32(
8116 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8117 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8118 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
8119 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8120 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8121 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8122 return vmlsl_s32(a, b, c);
8123 }
8124
8125 // CHECK-LABEL: @test_vmlsl_u8(
8126 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8127 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8128 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8129 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8130 return vmlsl_u8(a, b, c);
8131 }
8132
8133 // CHECK-LABEL: @test_vmlsl_u16(
8134 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8135 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8136 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8137 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8138 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8139 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8140 return vmlsl_u16(a, b, c);
8141 }
8142
8143 // CHECK-LABEL: @test_vmlsl_u32(
8144 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8145 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8146 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8147 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8148 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8149 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8150 return vmlsl_u32(a, b, c);
8151 }
8152
8153 // CHECK-LABEL: @test_vmlsl_high_s8(
8154 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8155 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8156 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8157 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8158 // CHECK: ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8159 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8160 return vmlsl_high_s8(a, b, c);
8161 }
8162
8163 // CHECK-LABEL: @test_vmlsl_high_s16(
8164 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8165 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8166 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8167 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8168 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8169 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8170 // CHECK: ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8171 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8172 return vmlsl_high_s16(a, b, c);
8173 }
8174
8175 // CHECK-LABEL: @test_vmlsl_high_s32(
8176 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8177 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8178 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8179 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8180 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8181 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8182 // CHECK: ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8183 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8184 return vmlsl_high_s32(a, b, c);
8185 }
8186
8187 // CHECK-LABEL: @test_vmlsl_high_u8(
8188 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8189 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8190 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8191 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8192 // CHECK: ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8193 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8194 return vmlsl_high_u8(a, b, c);
8195 }
8196
8197 // CHECK-LABEL: @test_vmlsl_high_u16(
8198 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8199 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8201 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8202 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8203 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8204 // CHECK: ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8205 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8206 return vmlsl_high_u16(a, b, c);
8207 }
8208
8209 // CHECK-LABEL: @test_vmlsl_high_u32(
8210 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8211 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8212 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8213 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8214 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8215 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8216 // CHECK: ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8217 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8218 return vmlsl_high_u32(a, b, c);
8219 }
8220
8221 // CHECK-LABEL: @test_vqdmull_s16(
8222 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8223 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8224 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
8225 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
8226 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)8227 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
8228 return vqdmull_s16(a, b);
8229 }
8230
8231 // CHECK-LABEL: @test_vqdmull_s32(
8232 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8233 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8234 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
8235 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
8236 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)8237 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
8238 return vqdmull_s32(a, b);
8239 }
8240
8241 // CHECK-LABEL: @test_vqdmlal_s16(
8242 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8243 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8244 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8245 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8246 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8247 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8248 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8249 return vqdmlal_s16(a, b, c);
8250 }
8251
8252 // CHECK-LABEL: @test_vqdmlal_s32(
8253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8254 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8255 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8256 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8257 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8258 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8259 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8260 return vqdmlal_s32(a, b, c);
8261 }
8262
8263 // CHECK-LABEL: @test_vqdmlsl_s16(
8264 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8265 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8266 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8267 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8268 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8269 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8270 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8271 return vqdmlsl_s16(a, b, c);
8272 }
8273
8274 // CHECK-LABEL: @test_vqdmlsl_s32(
8275 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8276 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8277 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8278 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8279 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8280 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8281 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8282 return vqdmlsl_s32(a, b, c);
8283 }
8284
8285 // CHECK-LABEL: @test_vqdmull_high_s16(
8286 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8287 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8288 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8289 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8290 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8291 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
8292 // CHECK: ret <4 x i32> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s16(int16x8_t a,int16x8_t b)8293 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
8294 return vqdmull_high_s16(a, b);
8295 }
8296
8297 // CHECK-LABEL: @test_vqdmull_high_s32(
8298 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8299 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8300 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8301 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8302 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8303 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
8304 // CHECK: ret <2 x i64> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s32(int32x4_t a,int32x4_t b)8305 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
8306 return vqdmull_high_s32(a, b);
8307 }
8308
8309 // CHECK-LABEL: @test_vqdmlal_high_s16(
8310 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8311 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8312 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8313 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8314 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8315 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8316 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8317 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8318 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8319 return vqdmlal_high_s16(a, b, c);
8320 }
8321
8322 // CHECK-LABEL: @test_vqdmlal_high_s32(
8323 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8324 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8325 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8326 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8327 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8328 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8329 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8330 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8331 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8332 return vqdmlal_high_s32(a, b, c);
8333 }
8334
8335 // CHECK-LABEL: @test_vqdmlsl_high_s16(
8336 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8337 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8338 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8339 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8340 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8341 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8342 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8343 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8344 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8345 return vqdmlsl_high_s16(a, b, c);
8346 }
8347
8348 // CHECK-LABEL: @test_vqdmlsl_high_s32(
8349 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8350 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8351 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8352 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8353 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8354 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8355 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8356 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8357 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8358 return vqdmlsl_high_s32(a, b, c);
8359 }
8360
8361 // CHECK-LABEL: @test_vmull_p8(
8362 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
8363 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)8364 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8365 return vmull_p8(a, b);
8366 }
8367
8368 // CHECK-LABEL: @test_vmull_high_p8(
8369 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8370 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8371 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8372 // CHECK: ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_p8(poly8x16_t a,poly8x16_t b)8373 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
8374 return vmull_high_p8(a, b);
8375 }
8376
8377 // CHECK-LABEL: @test_vaddd_s64(
8378 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
8379 // CHECK: ret i64 [[VADDD_I]]
test_vaddd_s64(int64_t a,int64_t b)8380 int64_t test_vaddd_s64(int64_t a, int64_t b) {
8381 return vaddd_s64(a, b);
8382 }
8383
8384 // CHECK-LABEL: @test_vaddd_u64(
8385 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
8386 // CHECK: ret i64 [[VADDD_I]]
test_vaddd_u64(uint64_t a,uint64_t b)8387 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
8388 return vaddd_u64(a, b);
8389 }
8390
8391 // CHECK-LABEL: @test_vsubd_s64(
8392 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
8393 // CHECK: ret i64 [[VSUBD_I]]
test_vsubd_s64(int64_t a,int64_t b)8394 int64_t test_vsubd_s64(int64_t a, int64_t b) {
8395 return vsubd_s64(a, b);
8396 }
8397
8398 // CHECK-LABEL: @test_vsubd_u64(
8399 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
8400 // CHECK: ret i64 [[VSUBD_I]]
test_vsubd_u64(uint64_t a,uint64_t b)8401 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
8402 return vsubd_u64(a, b);
8403 }
8404
8405 // CHECK-LABEL: @test_vqaddb_s8(
8406 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8407 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8408 // CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8409 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
8410 // CHECK: ret i8 [[TMP2]]
test_vqaddb_s8(int8_t a,int8_t b)8411 int8_t test_vqaddb_s8(int8_t a, int8_t b) {
8412 return vqaddb_s8(a, b);
8413 }
8414
8415 // CHECK-LABEL: @test_vqaddh_s16(
8416 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8417 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8418 // CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8419 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
8420 // CHECK: ret i16 [[TMP2]]
test_vqaddh_s16(int16_t a,int16_t b)8421 int16_t test_vqaddh_s16(int16_t a, int16_t b) {
8422 return vqaddh_s16(a, b);
8423 }
8424
8425 // CHECK-LABEL: @test_vqadds_s32(
8426 // CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b)
8427 // CHECK: ret i32 [[VQADDS_S32_I]]
test_vqadds_s32(int32_t a,int32_t b)8428 int32_t test_vqadds_s32(int32_t a, int32_t b) {
8429 return vqadds_s32(a, b);
8430 }
8431
8432 // CHECK-LABEL: @test_vqaddd_s64(
8433 // CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b)
8434 // CHECK: ret i64 [[VQADDD_S64_I]]
test_vqaddd_s64(int64_t a,int64_t b)8435 int64_t test_vqaddd_s64(int64_t a, int64_t b) {
8436 return vqaddd_s64(a, b);
8437 }
8438
8439 // CHECK-LABEL: @test_vqaddb_u8(
8440 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8441 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8442 // CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8443 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
8444 // CHECK: ret i8 [[TMP2]]
test_vqaddb_u8(uint8_t a,uint8_t b)8445 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
8446 return vqaddb_u8(a, b);
8447 }
8448
8449 // CHECK-LABEL: @test_vqaddh_u16(
8450 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8451 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8452 // CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8453 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
8454 // CHECK: ret i16 [[TMP2]]
test_vqaddh_u16(uint16_t a,uint16_t b)8455 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
8456 return vqaddh_u16(a, b);
8457 }
8458
8459 // CHECK-LABEL: @test_vqadds_u32(
8460 // CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b)
8461 // CHECK: ret i32 [[VQADDS_U32_I]]
test_vqadds_u32(uint32_t a,uint32_t b)8462 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
8463 return vqadds_u32(a, b);
8464 }
8465
8466 // CHECK-LABEL: @test_vqaddd_u64(
8467 // CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b)
8468 // CHECK: ret i64 [[VQADDD_U64_I]]
test_vqaddd_u64(uint64_t a,uint64_t b)8469 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
8470 return vqaddd_u64(a, b);
8471 }
8472
8473 // CHECK-LABEL: @test_vqsubb_s8(
8474 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8475 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8476 // CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8477 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
8478 // CHECK: ret i8 [[TMP2]]
test_vqsubb_s8(int8_t a,int8_t b)8479 int8_t test_vqsubb_s8(int8_t a, int8_t b) {
8480 return vqsubb_s8(a, b);
8481 }
8482
8483 // CHECK-LABEL: @test_vqsubh_s16(
8484 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8485 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8486 // CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8487 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
8488 // CHECK: ret i16 [[TMP2]]
test_vqsubh_s16(int16_t a,int16_t b)8489 int16_t test_vqsubh_s16(int16_t a, int16_t b) {
8490 return vqsubh_s16(a, b);
8491 }
8492
8493 // CHECK-LABEL: @test_vqsubs_s32(
8494 // CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
8495 // CHECK: ret i32 [[VQSUBS_S32_I]]
test_vqsubs_s32(int32_t a,int32_t b)8496 int32_t test_vqsubs_s32(int32_t a, int32_t b) {
8497 return vqsubs_s32(a, b);
8498 }
8499
8500 // CHECK-LABEL: @test_vqsubd_s64(
8501 // CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b)
8502 // CHECK: ret i64 [[VQSUBD_S64_I]]
test_vqsubd_s64(int64_t a,int64_t b)8503 int64_t test_vqsubd_s64(int64_t a, int64_t b) {
8504 return vqsubd_s64(a, b);
8505 }
8506
8507 // CHECK-LABEL: @test_vqsubb_u8(
8508 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8509 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8510 // CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8511 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
8512 // CHECK: ret i8 [[TMP2]]
test_vqsubb_u8(uint8_t a,uint8_t b)8513 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
8514 return vqsubb_u8(a, b);
8515 }
8516
8517 // CHECK-LABEL: @test_vqsubh_u16(
8518 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8519 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8520 // CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8521 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
8522 // CHECK: ret i16 [[TMP2]]
test_vqsubh_u16(uint16_t a,uint16_t b)8523 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
8524 return vqsubh_u16(a, b);
8525 }
8526
8527 // CHECK-LABEL: @test_vqsubs_u32(
8528 // CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
8529 // CHECK: ret i32 [[VQSUBS_U32_I]]
test_vqsubs_u32(uint32_t a,uint32_t b)8530 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
8531 return vqsubs_u32(a, b);
8532 }
8533
8534 // CHECK-LABEL: @test_vqsubd_u64(
8535 // CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
8536 // CHECK: ret i64 [[VQSUBD_U64_I]]
test_vqsubd_u64(uint64_t a,uint64_t b)8537 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
8538 return vqsubd_u64(a, b);
8539 }
8540
8541 // CHECK-LABEL: @test_vshld_s64(
8542 // CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
8543 // CHECK: ret i64 [[VSHLD_S64_I]]
test_vshld_s64(int64_t a,int64_t b)8544 int64_t test_vshld_s64(int64_t a, int64_t b) {
8545 return vshld_s64(a, b);
8546 }
8547
8548 // CHECK-LABEL: @test_vshld_u64(
8549 // CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
8550 // CHECK: ret i64 [[VSHLD_U64_I]]
test_vshld_u64(uint64_t a,uint64_t b)8551 uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
8552 return vshld_u64(a, b);
8553 }
8554
8555 // CHECK-LABEL: @test_vqshlb_s8(
8556 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8557 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8558 // CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8559 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
8560 // CHECK: ret i8 [[TMP2]]
test_vqshlb_s8(int8_t a,int8_t b)8561 int8_t test_vqshlb_s8(int8_t a, int8_t b) {
8562 return vqshlb_s8(a, b);
8563 }
8564
8565 // CHECK-LABEL: @test_vqshlh_s16(
8566 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8567 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8568 // CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8569 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
8570 // CHECK: ret i16 [[TMP2]]
test_vqshlh_s16(int16_t a,int16_t b)8571 int16_t test_vqshlh_s16(int16_t a, int16_t b) {
8572 return vqshlh_s16(a, b);
8573 }
8574
8575 // CHECK-LABEL: @test_vqshls_s32(
8576 // CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
8577 // CHECK: ret i32 [[VQSHLS_S32_I]]
test_vqshls_s32(int32_t a,int32_t b)8578 int32_t test_vqshls_s32(int32_t a, int32_t b) {
8579 return vqshls_s32(a, b);
8580 }
8581
8582 // CHECK-LABEL: @test_vqshld_s64(
8583 // CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
8584 // CHECK: ret i64 [[VQSHLD_S64_I]]
test_vqshld_s64(int64_t a,int64_t b)8585 int64_t test_vqshld_s64(int64_t a, int64_t b) {
8586 return vqshld_s64(a, b);
8587 }
8588
8589 // CHECK-LABEL: @test_vqshlb_u8(
8590 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8591 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8592 // CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8593 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
8594 // CHECK: ret i8 [[TMP2]]
test_vqshlb_u8(uint8_t a,uint8_t b)8595 uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
8596 return vqshlb_u8(a, b);
8597 }
8598
8599 // CHECK-LABEL: @test_vqshlh_u16(
8600 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8601 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8602 // CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8603 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
8604 // CHECK: ret i16 [[TMP2]]
test_vqshlh_u16(uint16_t a,uint16_t b)8605 uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
8606 return vqshlh_u16(a, b);
8607 }
8608
8609 // CHECK-LABEL: @test_vqshls_u32(
8610 // CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
8611 // CHECK: ret i32 [[VQSHLS_U32_I]]
test_vqshls_u32(uint32_t a,uint32_t b)8612 uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
8613 return vqshls_u32(a, b);
8614 }
8615
8616 // CHECK-LABEL: @test_vqshld_u64(
8617 // CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
8618 // CHECK: ret i64 [[VQSHLD_U64_I]]
test_vqshld_u64(uint64_t a,uint64_t b)8619 uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
8620 return vqshld_u64(a, b);
8621 }
8622
8623 // CHECK-LABEL: @test_vrshld_s64(
8624 // CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
8625 // CHECK: ret i64 [[VRSHLD_S64_I]]
test_vrshld_s64(int64_t a,int64_t b)8626 int64_t test_vrshld_s64(int64_t a, int64_t b) {
8627 return vrshld_s64(a, b);
8628 }
8629
8630 // CHECK-LABEL: @test_vrshld_u64(
8631 // CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
8632 // CHECK: ret i64 [[VRSHLD_U64_I]]
test_vrshld_u64(uint64_t a,uint64_t b)8633 uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
8634 return vrshld_u64(a, b);
8635 }
8636
8637 // CHECK-LABEL: @test_vqrshlb_s8(
8638 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8639 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8640 // CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8641 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
8642 // CHECK: ret i8 [[TMP2]]
test_vqrshlb_s8(int8_t a,int8_t b)8643 int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
8644 return vqrshlb_s8(a, b);
8645 }
8646
8647 // CHECK-LABEL: @test_vqrshlh_s16(
8648 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8649 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8650 // CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8651 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
8652 // CHECK: ret i16 [[TMP2]]
test_vqrshlh_s16(int16_t a,int16_t b)8653 int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
8654 return vqrshlh_s16(a, b);
8655 }
8656
8657 // CHECK-LABEL: @test_vqrshls_s32(
8658 // CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
8659 // CHECK: ret i32 [[VQRSHLS_S32_I]]
test_vqrshls_s32(int32_t a,int32_t b)8660 int32_t test_vqrshls_s32(int32_t a, int32_t b) {
8661 return vqrshls_s32(a, b);
8662 }
8663
8664 // CHECK-LABEL: @test_vqrshld_s64(
8665 // CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
8666 // CHECK: ret i64 [[VQRSHLD_S64_I]]
test_vqrshld_s64(int64_t a,int64_t b)8667 int64_t test_vqrshld_s64(int64_t a, int64_t b) {
8668 return vqrshld_s64(a, b);
8669 }
8670
8671 // CHECK-LABEL: @test_vqrshlb_u8(
8672 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8673 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8674 // CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8675 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
8676 // CHECK: ret i8 [[TMP2]]
test_vqrshlb_u8(uint8_t a,uint8_t b)8677 uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
8678 return vqrshlb_u8(a, b);
8679 }
8680
8681 // CHECK-LABEL: @test_vqrshlh_u16(
8682 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8683 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8684 // CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8685 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
8686 // CHECK: ret i16 [[TMP2]]
test_vqrshlh_u16(uint16_t a,uint16_t b)8687 uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
8688 return vqrshlh_u16(a, b);
8689 }
8690
8691 // CHECK-LABEL: @test_vqrshls_u32(
8692 // CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
8693 // CHECK: ret i32 [[VQRSHLS_U32_I]]
test_vqrshls_u32(uint32_t a,uint32_t b)8694 uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
8695 return vqrshls_u32(a, b);
8696 }
8697
8698 // CHECK-LABEL: @test_vqrshld_u64(
8699 // CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
8700 // CHECK: ret i64 [[VQRSHLD_U64_I]]
test_vqrshld_u64(uint64_t a,uint64_t b)8701 uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
8702 return vqrshld_u64(a, b);
8703 }
8704
8705 // CHECK-LABEL: @test_vpaddd_s64(
8706 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8707 // CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
8708 // CHECK: ret i64 [[VPADDD_S64_I]]
test_vpaddd_s64(int64x2_t a)8709 int64_t test_vpaddd_s64(int64x2_t a) {
8710 return vpaddd_s64(a);
8711 }
8712
8713 // CHECK-LABEL: @test_vpadds_f32(
8714 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8715 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
8716 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
8717 // CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
8718 // CHECK: ret float [[VPADDD_I]]
test_vpadds_f32(float32x2_t a)8719 float32_t test_vpadds_f32(float32x2_t a) {
8720 return vpadds_f32(a);
8721 }
8722
8723 // CHECK-LABEL: @test_vpaddd_f64(
8724 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8725 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
8726 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
8727 // CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
8728 // CHECK: ret double [[VPADDD_I]]
test_vpaddd_f64(float64x2_t a)8729 float64_t test_vpaddd_f64(float64x2_t a) {
8730 return vpaddd_f64(a);
8731 }
8732
8733 // CHECK-LABEL: @test_vpmaxnms_f32(
8734 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8735 // CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
8736 // CHECK: ret float [[VPMAXNMS_F32_I]]
test_vpmaxnms_f32(float32x2_t a)8737 float32_t test_vpmaxnms_f32(float32x2_t a) {
8738 return vpmaxnms_f32(a);
8739 }
8740
8741 // CHECK-LABEL: @test_vpmaxnmqd_f64(
8742 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8743 // CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
8744 // CHECK: ret double [[VPMAXNMQD_F64_I]]
test_vpmaxnmqd_f64(float64x2_t a)8745 float64_t test_vpmaxnmqd_f64(float64x2_t a) {
8746 return vpmaxnmqd_f64(a);
8747 }
8748
8749 // CHECK-LABEL: @test_vpmaxs_f32(
8750 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8751 // CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
8752 // CHECK: ret float [[VPMAXS_F32_I]]
test_vpmaxs_f32(float32x2_t a)8753 float32_t test_vpmaxs_f32(float32x2_t a) {
8754 return vpmaxs_f32(a);
8755 }
8756
8757 // CHECK-LABEL: @test_vpmaxqd_f64(
8758 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8759 // CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
8760 // CHECK: ret double [[VPMAXQD_F64_I]]
test_vpmaxqd_f64(float64x2_t a)8761 float64_t test_vpmaxqd_f64(float64x2_t a) {
8762 return vpmaxqd_f64(a);
8763 }
8764
8765 // CHECK-LABEL: @test_vpminnms_f32(
8766 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8767 // CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
8768 // CHECK: ret float [[VPMINNMS_F32_I]]
test_vpminnms_f32(float32x2_t a)8769 float32_t test_vpminnms_f32(float32x2_t a) {
8770 return vpminnms_f32(a);
8771 }
8772
8773 // CHECK-LABEL: @test_vpminnmqd_f64(
8774 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8775 // CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
8776 // CHECK: ret double [[VPMINNMQD_F64_I]]
test_vpminnmqd_f64(float64x2_t a)8777 float64_t test_vpminnmqd_f64(float64x2_t a) {
8778 return vpminnmqd_f64(a);
8779 }
8780
8781 // CHECK-LABEL: @test_vpmins_f32(
8782 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8783 // CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
8784 // CHECK: ret float [[VPMINS_F32_I]]
test_vpmins_f32(float32x2_t a)8785 float32_t test_vpmins_f32(float32x2_t a) {
8786 return vpmins_f32(a);
8787 }
8788
8789 // CHECK-LABEL: @test_vpminqd_f64(
8790 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8791 // CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
8792 // CHECK: ret double [[VPMINQD_F64_I]]
test_vpminqd_f64(float64x2_t a)8793 float64_t test_vpminqd_f64(float64x2_t a) {
8794 return vpminqd_f64(a);
8795 }
8796
8797 // CHECK-LABEL: @test_vqdmulhh_s16(
8798 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8799 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8800 // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8801 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
8802 // CHECK: ret i16 [[TMP2]]
test_vqdmulhh_s16(int16_t a,int16_t b)8803 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
8804 return vqdmulhh_s16(a, b);
8805 }
8806
8807 // CHECK-LABEL: @test_vqdmulhs_s32(
8808 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
8809 // CHECK: ret i32 [[VQDMULHS_S32_I]]
test_vqdmulhs_s32(int32_t a,int32_t b)8810 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
8811 return vqdmulhs_s32(a, b);
8812 }
8813
8814 // CHECK-LABEL: @test_vqrdmulhh_s16(
8815 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8816 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8817 // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8818 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
8819 // CHECK: ret i16 [[TMP2]]
test_vqrdmulhh_s16(int16_t a,int16_t b)8820 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
8821 return vqrdmulhh_s16(a, b);
8822 }
8823
8824 // CHECK-LABEL: @test_vqrdmulhs_s32(
8825 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b)
8826 // CHECK: ret i32 [[VQRDMULHS_S32_I]]
test_vqrdmulhs_s32(int32_t a,int32_t b)8827 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
8828 return vqrdmulhs_s32(a, b);
8829 }
8830
8831 // CHECK-LABEL: @test_vmulxs_f32(
8832 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
8833 // CHECK: ret float [[VMULXS_F32_I]]
test_vmulxs_f32(float32_t a,float32_t b)8834 float32_t test_vmulxs_f32(float32_t a, float32_t b) {
8835 return vmulxs_f32(a, b);
8836 }
8837
8838 // CHECK-LABEL: @test_vmulxd_f64(
8839 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
8840 // CHECK: ret double [[VMULXD_F64_I]]
test_vmulxd_f64(float64_t a,float64_t b)8841 float64_t test_vmulxd_f64(float64_t a, float64_t b) {
8842 return vmulxd_f64(a, b);
8843 }
8844
8845 // CHECK-LABEL: @test_vmulx_f64(
8846 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
8847 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
8848 // CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
8849 // CHECK: ret <1 x double> [[VMULX2_I]]
test_vmulx_f64(float64x1_t a,float64x1_t b)8850 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
8851 return vmulx_f64(a, b);
8852 }
8853
8854 // CHECK-LABEL: @test_vrecpss_f32(
8855 // CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
8856 // CHECK: ret float [[VRECPS_I]]
test_vrecpss_f32(float32_t a,float32_t b)8857 float32_t test_vrecpss_f32(float32_t a, float32_t b) {
8858 return vrecpss_f32(a, b);
8859 }
8860
8861 // CHECK-LABEL: @test_vrecpsd_f64(
8862 // CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
8863 // CHECK: ret double [[VRECPS_I]]
test_vrecpsd_f64(float64_t a,float64_t b)8864 float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
8865 return vrecpsd_f64(a, b);
8866 }
8867
8868 // CHECK-LABEL: @test_vrsqrtss_f32(
8869 // CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
8870 // CHECK: ret float [[VRSQRTSS_F32_I]]
test_vrsqrtss_f32(float32_t a,float32_t b)8871 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
8872 return vrsqrtss_f32(a, b);
8873 }
8874
8875 // CHECK-LABEL: @test_vrsqrtsd_f64(
8876 // CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
8877 // CHECK: ret double [[VRSQRTSD_F64_I]]
test_vrsqrtsd_f64(float64_t a,float64_t b)8878 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
8879 return vrsqrtsd_f64(a, b);
8880 }
8881
8882 // CHECK-LABEL: @test_vcvts_f32_s32(
8883 // CHECK: [[TMP0:%.*]] = sitofp i32 %a to float
8884 // CHECK: ret float [[TMP0]]
test_vcvts_f32_s32(int32_t a)8885 float32_t test_vcvts_f32_s32(int32_t a) {
8886 return vcvts_f32_s32(a);
8887 }
8888
8889 // CHECK-LABEL: @test_vcvtd_f64_s64(
8890 // CHECK: [[TMP0:%.*]] = sitofp i64 %a to double
8891 // CHECK: ret double [[TMP0]]
test_vcvtd_f64_s64(int64_t a)8892 float64_t test_vcvtd_f64_s64(int64_t a) {
8893 return vcvtd_f64_s64(a);
8894 }
8895
8896 // CHECK-LABEL: @test_vcvts_f32_u32(
8897 // CHECK: [[TMP0:%.*]] = uitofp i32 %a to float
8898 // CHECK: ret float [[TMP0]]
test_vcvts_f32_u32(uint32_t a)8899 float32_t test_vcvts_f32_u32(uint32_t a) {
8900 return vcvts_f32_u32(a);
8901 }
8902
8903 // CHECK-LABEL: @test_vcvtd_f64_u64(
8904 // CHECK: [[TMP0:%.*]] = uitofp i64 %a to double
8905 // CHECK: ret double [[TMP0]]
test_vcvtd_f64_u64(uint64_t a)8906 float64_t test_vcvtd_f64_u64(uint64_t a) {
8907 return vcvtd_f64_u64(a);
8908 }
8909
8910 // CHECK-LABEL: @test_vrecpes_f32(
8911 // CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
8912 // CHECK: ret float [[VRECPES_F32_I]]
test_vrecpes_f32(float32_t a)8913 float32_t test_vrecpes_f32(float32_t a) {
8914 return vrecpes_f32(a);
8915 }
8916
8917 // CHECK-LABEL: @test_vrecped_f64(
8918 // CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
8919 // CHECK: ret double [[VRECPED_F64_I]]
test_vrecped_f64(float64_t a)8920 float64_t test_vrecped_f64(float64_t a) {
8921 return vrecped_f64(a);
8922 }
8923
8924 // CHECK-LABEL: @test_vrecpxs_f32(
8925 // CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
8926 // CHECK: ret float [[VRECPXS_F32_I]]
test_vrecpxs_f32(float32_t a)8927 float32_t test_vrecpxs_f32(float32_t a) {
8928 return vrecpxs_f32(a);
8929 }
8930
8931 // CHECK-LABEL: @test_vrecpxd_f64(
8932 // CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
8933 // CHECK: ret double [[VRECPXD_F64_I]]
test_vrecpxd_f64(float64_t a)8934 float64_t test_vrecpxd_f64(float64_t a) {
8935 return vrecpxd_f64(a);
8936 }
8937
8938 // CHECK-LABEL: @test_vrsqrte_u32(
8939 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8940 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
8941 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)8942 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
8943 return vrsqrte_u32(a);
8944 }
8945
8946 // CHECK-LABEL: @test_vrsqrteq_u32(
8947 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8948 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
8949 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)8950 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
8951 return vrsqrteq_u32(a);
8952 }
8953
8954 // CHECK-LABEL: @test_vrsqrtes_f32(
8955 // CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
8956 // CHECK: ret float [[VRSQRTES_F32_I]]
test_vrsqrtes_f32(float32_t a)8957 float32_t test_vrsqrtes_f32(float32_t a) {
8958 return vrsqrtes_f32(a);
8959 }
8960
8961 // CHECK-LABEL: @test_vrsqrted_f64(
8962 // CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
8963 // CHECK: ret double [[VRSQRTED_F64_I]]
test_vrsqrted_f64(float64_t a)8964 float64_t test_vrsqrted_f64(float64_t a) {
8965 return vrsqrted_f64(a);
8966 }
8967
8968 // CHECK-LABEL: @test_vld1q_u8(
8969 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
8970 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
8971 // CHECK: ret <16 x i8> [[TMP1]]
test_vld1q_u8(uint8_t const * a)8972 uint8x16_t test_vld1q_u8(uint8_t const *a) {
8973 return vld1q_u8(a);
8974 }
8975
8976 // CHECK-LABEL: @test_vld1q_u16(
8977 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
8978 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
8979 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
8980 // CHECK: ret <8 x i16> [[TMP2]]
test_vld1q_u16(uint16_t const * a)8981 uint16x8_t test_vld1q_u16(uint16_t const *a) {
8982 return vld1q_u16(a);
8983 }
8984
8985 // CHECK-LABEL: @test_vld1q_u32(
8986 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
8987 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
8988 // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
8989 // CHECK: ret <4 x i32> [[TMP2]]
test_vld1q_u32(uint32_t const * a)8990 uint32x4_t test_vld1q_u32(uint32_t const *a) {
8991 return vld1q_u32(a);
8992 }
8993
8994 // CHECK-LABEL: @test_vld1q_u64(
8995 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
8996 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
8997 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
8998 // CHECK: ret <2 x i64> [[TMP2]]
test_vld1q_u64(uint64_t const * a)8999 uint64x2_t test_vld1q_u64(uint64_t const *a) {
9000 return vld1q_u64(a);
9001 }
9002
9003 // CHECK-LABEL: @test_vld1q_s8(
9004 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9005 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9006 // CHECK: ret <16 x i8> [[TMP1]]
test_vld1q_s8(int8_t const * a)9007 int8x16_t test_vld1q_s8(int8_t const *a) {
9008 return vld1q_s8(a);
9009 }
9010
9011 // CHECK-LABEL: @test_vld1q_s16(
9012 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9013 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9014 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9015 // CHECK: ret <8 x i16> [[TMP2]]
test_vld1q_s16(int16_t const * a)9016 int16x8_t test_vld1q_s16(int16_t const *a) {
9017 return vld1q_s16(a);
9018 }
9019
9020 // CHECK-LABEL: @test_vld1q_s32(
9021 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
9022 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9023 // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9024 // CHECK: ret <4 x i32> [[TMP2]]
test_vld1q_s32(int32_t const * a)9025 int32x4_t test_vld1q_s32(int32_t const *a) {
9026 return vld1q_s32(a);
9027 }
9028
9029 // CHECK-LABEL: @test_vld1q_s64(
9030 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
9031 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9032 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9033 // CHECK: ret <2 x i64> [[TMP2]]
test_vld1q_s64(int64_t const * a)9034 int64x2_t test_vld1q_s64(int64_t const *a) {
9035 return vld1q_s64(a);
9036 }
9037
9038 // CHECK-LABEL: @test_vld1q_f16(
9039 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
9040 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
9041 // CHECK: [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]]
9042 // CHECK: ret <8 x half> [[TMP2]]
test_vld1q_f16(float16_t const * a)9043 float16x8_t test_vld1q_f16(float16_t const *a) {
9044 return vld1q_f16(a);
9045 }
9046
9047 // CHECK-LABEL: @test_vld1q_f32(
9048 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
9049 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
9050 // CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]]
9051 // CHECK: ret <4 x float> [[TMP2]]
test_vld1q_f32(float32_t const * a)9052 float32x4_t test_vld1q_f32(float32_t const *a) {
9053 return vld1q_f32(a);
9054 }
9055
9056 // CHECK-LABEL: @test_vld1q_f64(
9057 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
9058 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
9059 // CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]]
9060 // CHECK: ret <2 x double> [[TMP2]]
test_vld1q_f64(float64_t const * a)9061 float64x2_t test_vld1q_f64(float64_t const *a) {
9062 return vld1q_f64(a);
9063 }
9064
9065 // CHECK-LABEL: @test_vld1q_p8(
9066 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9067 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9068 // CHECK: ret <16 x i8> [[TMP1]]
test_vld1q_p8(poly8_t const * a)9069 poly8x16_t test_vld1q_p8(poly8_t const *a) {
9070 return vld1q_p8(a);
9071 }
9072
9073 // CHECK-LABEL: @test_vld1q_p16(
9074 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9075 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9076 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9077 // CHECK: ret <8 x i16> [[TMP2]]
test_vld1q_p16(poly16_t const * a)9078 poly16x8_t test_vld1q_p16(poly16_t const *a) {
9079 return vld1q_p16(a);
9080 }
9081
9082 // CHECK-LABEL: @test_vld1_u8(
9083 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9084 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9085 // CHECK: ret <8 x i8> [[TMP1]]
test_vld1_u8(uint8_t const * a)9086 uint8x8_t test_vld1_u8(uint8_t const *a) {
9087 return vld1_u8(a);
9088 }
9089
9090 // CHECK-LABEL: @test_vld1_u16(
9091 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9092 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9093 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9094 // CHECK: ret <4 x i16> [[TMP2]]
test_vld1_u16(uint16_t const * a)9095 uint16x4_t test_vld1_u16(uint16_t const *a) {
9096 return vld1_u16(a);
9097 }
9098
9099 // CHECK-LABEL: @test_vld1_u32(
9100 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
9101 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9102 // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
9103 // CHECK: ret <2 x i32> [[TMP2]]
test_vld1_u32(uint32_t const * a)9104 uint32x2_t test_vld1_u32(uint32_t const *a) {
9105 return vld1_u32(a);
9106 }
9107
9108 // CHECK-LABEL: @test_vld1_u64(
9109 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
9110 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9111 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
9112 // CHECK: ret <1 x i64> [[TMP2]]
test_vld1_u64(uint64_t const * a)9113 uint64x1_t test_vld1_u64(uint64_t const *a) {
9114 return vld1_u64(a);
9115 }
9116
9117 // CHECK-LABEL: @test_vld1_s8(
9118 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9119 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9120 // CHECK: ret <8 x i8> [[TMP1]]
test_vld1_s8(int8_t const * a)9121 int8x8_t test_vld1_s8(int8_t const *a) {
9122 return vld1_s8(a);
9123 }
9124
9125 // CHECK-LABEL: @test_vld1_s16(
9126 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9127 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9128 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9129 // CHECK: ret <4 x i16> [[TMP2]]
test_vld1_s16(int16_t const * a)9130 int16x4_t test_vld1_s16(int16_t const *a) {
9131 return vld1_s16(a);
9132 }
9133
9134 // CHECK-LABEL: @test_vld1_s32(
9135 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
9136 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9137 // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
9138 // CHECK: ret <2 x i32> [[TMP2]]
test_vld1_s32(int32_t const * a)9139 int32x2_t test_vld1_s32(int32_t const *a) {
9140 return vld1_s32(a);
9141 }
9142
9143 // CHECK-LABEL: @test_vld1_s64(
9144 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
9145 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9146 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
9147 // CHECK: ret <1 x i64> [[TMP2]]
test_vld1_s64(int64_t const * a)9148 int64x1_t test_vld1_s64(int64_t const *a) {
9149 return vld1_s64(a);
9150 }
9151
9152 // CHECK-LABEL: @test_vld1_f16(
9153 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
9154 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
9155 // CHECK: [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]]
9156 // CHECK: ret <4 x half> [[TMP2]]
test_vld1_f16(float16_t const * a)9157 float16x4_t test_vld1_f16(float16_t const *a) {
9158 return vld1_f16(a);
9159 }
9160
9161 // CHECK-LABEL: @test_vld1_f32(
9162 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
9163 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
9164 // CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]]
9165 // CHECK: ret <2 x float> [[TMP2]]
test_vld1_f32(float32_t const * a)9166 float32x2_t test_vld1_f32(float32_t const *a) {
9167 return vld1_f32(a);
9168 }
9169
9170 // CHECK-LABEL: @test_vld1_f64(
9171 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
9172 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
9173 // CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]]
9174 // CHECK: ret <1 x double> [[TMP2]]
test_vld1_f64(float64_t const * a)9175 float64x1_t test_vld1_f64(float64_t const *a) {
9176 return vld1_f64(a);
9177 }
9178
9179 // CHECK-LABEL: @test_vld1_p8(
9180 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9181 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9182 // CHECK: ret <8 x i8> [[TMP1]]
test_vld1_p8(poly8_t const * a)9183 poly8x8_t test_vld1_p8(poly8_t const *a) {
9184 return vld1_p8(a);
9185 }
9186
9187 // CHECK-LABEL: @test_vld1_p16(
9188 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9189 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9190 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9191 // CHECK: ret <4 x i16> [[TMP2]]
test_vld1_p16(poly16_t const * a)9192 poly16x4_t test_vld1_p16(poly16_t const *a) {
9193 return vld1_p16(a);
9194 }
9195
9196 // CHECK-LABEL: @test_vld2q_u8(
9197 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
9198 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
9199 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9200 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9201 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9202 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9203 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9204 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
9205 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9206 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9207 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
9208 // CHECK: ret %struct.uint8x16x2_t [[TMP5]]
test_vld2q_u8(uint8_t const * a)9209 uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
9210 return vld2q_u8(a);
9211 }
9212
9213 // CHECK-LABEL: @test_vld2q_u16(
9214 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
9215 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
9216 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9217 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9218 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9219 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9220 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9221 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9222 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
9223 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9224 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9225 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
9226 // CHECK: ret %struct.uint16x8x2_t [[TMP6]]
test_vld2q_u16(uint16_t const * a)9227 uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
9228 return vld2q_u16(a);
9229 }
9230
9231 // CHECK-LABEL: @test_vld2q_u32(
9232 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
9233 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
9234 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9235 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9236 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9237 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9238 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9239 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9240 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
9241 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9242 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9243 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
9244 // CHECK: ret %struct.uint32x4x2_t [[TMP6]]
test_vld2q_u32(uint32_t const * a)9245 uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
9246 return vld2q_u32(a);
9247 }
9248
9249 // CHECK-LABEL: @test_vld2q_u64(
9250 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
9251 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
9252 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9253 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9254 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9255 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9256 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9257 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9258 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
9259 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9260 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9261 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
9262 // CHECK: ret %struct.uint64x2x2_t [[TMP6]]
test_vld2q_u64(uint64_t const * a)9263 uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
9264 return vld2q_u64(a);
9265 }
9266
9267 // CHECK-LABEL: @test_vld2q_s8(
9268 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
9269 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
9270 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9271 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9272 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9273 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9274 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9275 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
9276 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9277 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9278 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
9279 // CHECK: ret %struct.int8x16x2_t [[TMP5]]
test_vld2q_s8(int8_t const * a)9280 int8x16x2_t test_vld2q_s8(int8_t const *a) {
9281 return vld2q_s8(a);
9282 }
9283
9284 // CHECK-LABEL: @test_vld2q_s16(
9285 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
9286 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
9287 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9288 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9289 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9290 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9291 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9292 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9293 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
9294 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9295 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9296 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
9297 // CHECK: ret %struct.int16x8x2_t [[TMP6]]
test_vld2q_s16(int16_t const * a)9298 int16x8x2_t test_vld2q_s16(int16_t const *a) {
9299 return vld2q_s16(a);
9300 }
9301
9302 // CHECK-LABEL: @test_vld2q_s32(
9303 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
9304 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
9305 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9306 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9307 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9308 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9309 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9310 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9311 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
9312 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9313 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9314 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
9315 // CHECK: ret %struct.int32x4x2_t [[TMP6]]
test_vld2q_s32(int32_t const * a)9316 int32x4x2_t test_vld2q_s32(int32_t const *a) {
9317 return vld2q_s32(a);
9318 }
9319
9320 // CHECK-LABEL: @test_vld2q_s64(
9321 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
9322 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
9323 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9324 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9325 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9326 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9327 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9328 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9329 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
9330 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9331 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9332 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
9333 // CHECK: ret %struct.int64x2x2_t [[TMP6]]
test_vld2q_s64(int64_t const * a)9334 int64x2x2_t test_vld2q_s64(int64_t const *a) {
9335 return vld2q_s64(a);
9336 }
9337
9338 // CHECK-LABEL: @test_vld2q_f16(
9339 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
9340 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
9341 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9342 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
9343 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9344 // CHECK: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9345 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
9346 // CHECK: store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]]
9347 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
9348 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9349 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9350 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
9351 // CHECK: ret %struct.float16x8x2_t [[TMP6]]
test_vld2q_f16(float16_t const * a)9352 float16x8x2_t test_vld2q_f16(float16_t const *a) {
9353 return vld2q_f16(a);
9354 }
9355
9356 // CHECK-LABEL: @test_vld2q_f32(
9357 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
9358 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
9359 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9360 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
9361 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9362 // CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9363 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
9364 // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
9365 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
9366 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9367 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9368 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
9369 // CHECK: ret %struct.float32x4x2_t [[TMP6]]
test_vld2q_f32(float32_t const * a)9370 float32x4x2_t test_vld2q_f32(float32_t const *a) {
9371 return vld2q_f32(a);
9372 }
9373
9374 // CHECK-LABEL: @test_vld2q_f64(
9375 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
9376 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
9377 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9378 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
9379 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9380 // CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9381 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
9382 // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
9383 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
9384 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9385 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9386 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
9387 // CHECK: ret %struct.float64x2x2_t [[TMP6]]
test_vld2q_f64(float64_t const * a)9388 float64x2x2_t test_vld2q_f64(float64_t const *a) {
9389 return vld2q_f64(a);
9390 }
9391
9392 // CHECK-LABEL: @test_vld2q_p8(
9393 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
9394 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
9395 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9396 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9397 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9398 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9399 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9400 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
9401 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9402 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9403 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
9404 // CHECK: ret %struct.poly8x16x2_t [[TMP5]]
test_vld2q_p8(poly8_t const * a)9405 poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
9406 return vld2q_p8(a);
9407 }
9408
9409 // CHECK-LABEL: @test_vld2q_p16(
9410 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
9411 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
9412 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9413 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9414 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9415 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9416 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9417 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9418 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
9419 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9420 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9421 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
9422 // CHECK: ret %struct.poly16x8x2_t [[TMP6]]
test_vld2q_p16(poly16_t const * a)9423 poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
9424 return vld2q_p16(a);
9425 }
9426
9427 // CHECK-LABEL: @test_vld2_u8(
9428 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
9429 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
9430 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9431 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9432 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9433 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9434 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9435 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
9436 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9437 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9438 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
9439 // CHECK: ret %struct.uint8x8x2_t [[TMP5]]
test_vld2_u8(uint8_t const * a)9440 uint8x8x2_t test_vld2_u8(uint8_t const *a) {
9441 return vld2_u8(a);
9442 }
9443
9444 // CHECK-LABEL: @test_vld2_u16(
9445 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
9446 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
9447 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9448 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9449 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9450 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9451 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9452 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9453 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
9454 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9455 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9456 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
9457 // CHECK: ret %struct.uint16x4x2_t [[TMP6]]
test_vld2_u16(uint16_t const * a)9458 uint16x4x2_t test_vld2_u16(uint16_t const *a) {
9459 return vld2_u16(a);
9460 }
9461
9462 // CHECK-LABEL: @test_vld2_u32(
9463 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
9464 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
9465 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9466 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9467 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9468 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9469 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9470 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9471 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
9472 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9473 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9474 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
9475 // CHECK: ret %struct.uint32x2x2_t [[TMP6]]
test_vld2_u32(uint32_t const * a)9476 uint32x2x2_t test_vld2_u32(uint32_t const *a) {
9477 return vld2_u32(a);
9478 }
9479
9480 // CHECK-LABEL: @test_vld2_u64(
9481 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
9482 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
9483 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9484 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9485 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9486 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9487 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9488 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9489 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
9490 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9491 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9492 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
9493 // CHECK: ret %struct.uint64x1x2_t [[TMP6]]
test_vld2_u64(uint64_t const * a)9494 uint64x1x2_t test_vld2_u64(uint64_t const *a) {
9495 return vld2_u64(a);
9496 }
9497
9498 // CHECK-LABEL: @test_vld2_s8(
9499 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
9500 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
9501 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9502 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9503 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9504 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9505 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9506 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
9507 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9508 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9509 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
9510 // CHECK: ret %struct.int8x8x2_t [[TMP5]]
test_vld2_s8(int8_t const * a)9511 int8x8x2_t test_vld2_s8(int8_t const *a) {
9512 return vld2_s8(a);
9513 }
9514
9515 // CHECK-LABEL: @test_vld2_s16(
9516 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
9517 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
9518 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9519 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9520 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9521 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9522 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9523 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9524 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
9525 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9526 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9527 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
9528 // CHECK: ret %struct.int16x4x2_t [[TMP6]]
test_vld2_s16(int16_t const * a)9529 int16x4x2_t test_vld2_s16(int16_t const *a) {
9530 return vld2_s16(a);
9531 }
9532
9533 // CHECK-LABEL: @test_vld2_s32(
9534 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
9535 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
9536 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9537 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9538 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9539 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9540 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9541 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9542 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
9543 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9544 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9545 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
9546 // CHECK: ret %struct.int32x2x2_t [[TMP6]]
test_vld2_s32(int32_t const * a)9547 int32x2x2_t test_vld2_s32(int32_t const *a) {
9548 return vld2_s32(a);
9549 }
9550
9551 // CHECK-LABEL: @test_vld2_s64(
9552 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
9553 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
9554 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9555 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9556 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9557 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9558 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9559 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9560 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
9561 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9562 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9563 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
9564 // CHECK: ret %struct.int64x1x2_t [[TMP6]]
test_vld2_s64(int64_t const * a)9565 int64x1x2_t test_vld2_s64(int64_t const *a) {
9566 return vld2_s64(a);
9567 }
9568
9569 // CHECK-LABEL: @test_vld2_f16(
9570 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
9571 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
9572 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9573 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
9574 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
9575 // CHECK: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>* [[TMP2]])
9576 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
9577 // CHECK: store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]]
9578 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
9579 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9580 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9581 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
9582 // CHECK: ret %struct.float16x4x2_t [[TMP6]]
test_vld2_f16(float16_t const * a)9583 float16x4x2_t test_vld2_f16(float16_t const *a) {
9584 return vld2_f16(a);
9585 }
9586
9587 // CHECK-LABEL: @test_vld2_f32(
9588 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
9589 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
9590 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9591 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
9592 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
9593 // CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
9594 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
9595 // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
9596 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
9597 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9598 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9599 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
9600 // CHECK: ret %struct.float32x2x2_t [[TMP6]]
test_vld2_f32(float32_t const * a)9601 float32x2x2_t test_vld2_f32(float32_t const *a) {
9602 return vld2_f32(a);
9603 }
9604
9605 // CHECK-LABEL: @test_vld2_f64(
9606 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
9607 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
9608 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9609 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
9610 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
9611 // CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]])
9612 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
9613 // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
9614 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
9615 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9616 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9617 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
9618 // CHECK: ret %struct.float64x1x2_t [[TMP6]]
test_vld2_f64(float64_t const * a)9619 float64x1x2_t test_vld2_f64(float64_t const *a) {
9620 return vld2_f64(a);
9621 }
9622
9623 // CHECK-LABEL: @test_vld2_p8(
9624 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
9625 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
9626 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9627 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9628 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9629 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9630 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9631 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
9632 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9633 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9634 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
9635 // CHECK: ret %struct.poly8x8x2_t [[TMP5]]
test_vld2_p8(poly8_t const * a)9636 poly8x8x2_t test_vld2_p8(poly8_t const *a) {
9637 return vld2_p8(a);
9638 }
9639
9640 // CHECK-LABEL: @test_vld2_p16(
9641 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
9642 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
9643 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9644 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9645 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9646 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9647 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9648 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9649 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
9650 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9651 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9652 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
9653 // CHECK: ret %struct.poly16x4x2_t [[TMP6]]
test_vld2_p16(poly16_t const * a)9654 poly16x4x2_t test_vld2_p16(poly16_t const *a) {
9655 return vld2_p16(a);
9656 }
9657
9658 // CHECK-LABEL: @test_vld3q_u8(
9659 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
9660 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
9661 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9662 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9663 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9664 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9665 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9666 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
9667 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9668 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9669 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
9670 // CHECK: ret %struct.uint8x16x3_t [[TMP5]]
test_vld3q_u8(uint8_t const * a)9671 uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
9672 return vld3q_u8(a);
9673 }
9674
9675 // CHECK-LABEL: @test_vld3q_u16(
9676 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
9677 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
9678 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9679 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9680 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9681 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9682 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9683 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9684 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
9685 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9686 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9687 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
9688 // CHECK: ret %struct.uint16x8x3_t [[TMP6]]
test_vld3q_u16(uint16_t const * a)9689 uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
9690 return vld3q_u16(a);
9691 }
9692
9693 // CHECK-LABEL: @test_vld3q_u32(
9694 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
9695 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
9696 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9697 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9698 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9699 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9700 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9701 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9702 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
9703 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9704 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9705 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
9706 // CHECK: ret %struct.uint32x4x3_t [[TMP6]]
test_vld3q_u32(uint32_t const * a)9707 uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
9708 return vld3q_u32(a);
9709 }
9710
9711 // CHECK-LABEL: @test_vld3q_u64(
9712 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
9713 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
9714 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9715 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9716 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9717 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9718 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9719 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9720 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
9721 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9722 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9723 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
9724 // CHECK: ret %struct.uint64x2x3_t [[TMP6]]
test_vld3q_u64(uint64_t const * a)9725 uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
9726 return vld3q_u64(a);
9727 }
9728
9729 // CHECK-LABEL: @test_vld3q_s8(
9730 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
9731 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
9732 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9733 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9734 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9735 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9736 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9737 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
9738 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9739 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9740 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
9741 // CHECK: ret %struct.int8x16x3_t [[TMP5]]
test_vld3q_s8(int8_t const * a)9742 int8x16x3_t test_vld3q_s8(int8_t const *a) {
9743 return vld3q_s8(a);
9744 }
9745
9746 // CHECK-LABEL: @test_vld3q_s16(
9747 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
9748 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
9749 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9750 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9751 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9752 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9753 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9754 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9755 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
9756 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9757 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9758 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
9759 // CHECK: ret %struct.int16x8x3_t [[TMP6]]
test_vld3q_s16(int16_t const * a)9760 int16x8x3_t test_vld3q_s16(int16_t const *a) {
9761 return vld3q_s16(a);
9762 }
9763
9764 // CHECK-LABEL: @test_vld3q_s32(
9765 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
9766 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
9767 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9768 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9769 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9770 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9771 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9772 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9773 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
9774 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9775 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9776 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
9777 // CHECK: ret %struct.int32x4x3_t [[TMP6]]
test_vld3q_s32(int32_t const * a)9778 int32x4x3_t test_vld3q_s32(int32_t const *a) {
9779 return vld3q_s32(a);
9780 }
9781
9782 // CHECK-LABEL: @test_vld3q_s64(
9783 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
9784 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
9785 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9786 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9787 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9788 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9789 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9790 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9791 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
9792 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9793 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9794 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
9795 // CHECK: ret %struct.int64x2x3_t [[TMP6]]
test_vld3q_s64(int64_t const * a)9796 int64x2x3_t test_vld3q_s64(int64_t const *a) {
9797 return vld3q_s64(a);
9798 }
9799
9800 // CHECK-LABEL: @test_vld3q_f16(
9801 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
9802 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
9803 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9804 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
9805 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9806 // CHECK: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9807 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
9808 // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
9809 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
9810 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9811 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9812 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
9813 // CHECK: ret %struct.float16x8x3_t [[TMP6]]
test_vld3q_f16(float16_t const * a)9814 float16x8x3_t test_vld3q_f16(float16_t const *a) {
9815 return vld3q_f16(a);
9816 }
9817
9818 // CHECK-LABEL: @test_vld3q_f32(
9819 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
9820 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
9821 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9822 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
9823 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9824 // CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9825 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
9826 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
9827 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
9828 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9829 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9830 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
9831 // CHECK: ret %struct.float32x4x3_t [[TMP6]]
test_vld3q_f32(float32_t const * a)9832 float32x4x3_t test_vld3q_f32(float32_t const *a) {
9833 return vld3q_f32(a);
9834 }
9835
9836 // CHECK-LABEL: @test_vld3q_f64(
9837 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
9838 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
9839 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9840 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
9841 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9842 // CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9843 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
9844 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
9845 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
9846 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9847 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9848 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
9849 // CHECK: ret %struct.float64x2x3_t [[TMP6]]
test_vld3q_f64(float64_t const * a)9850 float64x2x3_t test_vld3q_f64(float64_t const *a) {
9851 return vld3q_f64(a);
9852 }
9853
9854 // CHECK-LABEL: @test_vld3q_p8(
9855 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
9856 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
9857 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9858 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9859 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9860 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9861 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9862 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
9863 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9864 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9865 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
9866 // CHECK: ret %struct.poly8x16x3_t [[TMP5]]
test_vld3q_p8(poly8_t const * a)9867 poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
9868 return vld3q_p8(a);
9869 }
9870
9871 // CHECK-LABEL: @test_vld3q_p16(
9872 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
9873 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
9874 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9875 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9876 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9877 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9878 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9879 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9880 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
9881 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9882 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9883 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
9884 // CHECK: ret %struct.poly16x8x3_t [[TMP6]]
test_vld3q_p16(poly16_t const * a)9885 poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
9886 return vld3q_p16(a);
9887 }
9888
9889 // CHECK-LABEL: @test_vld3_u8(
9890 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
9891 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
9892 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9893 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9894 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9895 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
9896 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
9897 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
9898 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9899 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
9900 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
9901 // CHECK: ret %struct.uint8x8x3_t [[TMP5]]
test_vld3_u8(uint8_t const * a)9902 uint8x8x3_t test_vld3_u8(uint8_t const *a) {
9903 return vld3_u8(a);
9904 }
9905
9906 // CHECK-LABEL: @test_vld3_u16(
9907 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
9908 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
9909 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
9910 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9911 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9912 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9913 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
9914 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
9915 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
9916 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
9917 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9918 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
9919 // CHECK: ret %struct.uint16x4x3_t [[TMP6]]
test_vld3_u16(uint16_t const * a)9920 uint16x4x3_t test_vld3_u16(uint16_t const *a) {
9921 return vld3_u16(a);
9922 }
9923
9924 // CHECK-LABEL: @test_vld3_u32(
9925 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
9926 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
9927 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
9928 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9929 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9930 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9931 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
9932 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
9933 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
9934 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
9935 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9936 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
9937 // CHECK: ret %struct.uint32x2x3_t [[TMP6]]
test_vld3_u32(uint32_t const * a)9938 uint32x2x3_t test_vld3_u32(uint32_t const *a) {
9939 return vld3_u32(a);
9940 }
9941
9942 // CHECK-LABEL: @test_vld3_u64(
9943 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
9944 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
9945 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
9946 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9947 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9948 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9949 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
9950 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
9951 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
9952 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
9953 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9954 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
9955 // CHECK: ret %struct.uint64x1x3_t [[TMP6]]
test_vld3_u64(uint64_t const * a)9956 uint64x1x3_t test_vld3_u64(uint64_t const *a) {
9957 return vld3_u64(a);
9958 }
9959
9960 // CHECK-LABEL: @test_vld3_s8(
9961 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
9962 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
9963 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
9964 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9965 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9966 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
9967 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
9968 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
9969 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
9970 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
9971 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
9972 // CHECK: ret %struct.int8x8x3_t [[TMP5]]
test_vld3_s8(int8_t const * a)9973 int8x8x3_t test_vld3_s8(int8_t const *a) {
9974 return vld3_s8(a);
9975 }
9976
9977 // CHECK-LABEL: @test_vld3_s16(
9978 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
9979 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
9980 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
9981 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9982 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9983 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9984 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
9985 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
9986 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
9987 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
9988 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9989 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
9990 // CHECK: ret %struct.int16x4x3_t [[TMP6]]
test_vld3_s16(int16_t const * a)9991 int16x4x3_t test_vld3_s16(int16_t const *a) {
9992 return vld3_s16(a);
9993 }
9994
9995 // CHECK-LABEL: @test_vld3_s32(
9996 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
9997 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
9998 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
9999 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10000 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10001 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10002 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10003 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10004 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
10005 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10006 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10007 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
10008 // CHECK: ret %struct.int32x2x3_t [[TMP6]]
test_vld3_s32(int32_t const * a)10009 int32x2x3_t test_vld3_s32(int32_t const *a) {
10010 return vld3_s32(a);
10011 }
10012
10013 // CHECK-LABEL: @test_vld3_s64(
10014 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
10015 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
10016 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10017 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10018 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10019 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10020 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10021 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10022 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
10023 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10024 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10025 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
10026 // CHECK: ret %struct.int64x1x3_t [[TMP6]]
test_vld3_s64(int64_t const * a)10027 int64x1x3_t test_vld3_s64(int64_t const *a) {
10028 return vld3_s64(a);
10029 }
10030
10031 // CHECK-LABEL: @test_vld3_f16(
10032 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
10033 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
10034 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10035 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10036 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10037 // CHECK: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10038 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
10039 // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10040 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
10041 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10042 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10043 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
10044 // CHECK: ret %struct.float16x4x3_t [[TMP6]]
test_vld3_f16(float16_t const * a)10045 float16x4x3_t test_vld3_f16(float16_t const *a) {
10046 return vld3_f16(a);
10047 }
10048
10049 // CHECK-LABEL: @test_vld3_f32(
10050 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
10051 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
10052 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10053 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10054 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10055 // CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10056 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
10057 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10058 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
10059 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10060 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10061 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
10062 // CHECK: ret %struct.float32x2x3_t [[TMP6]]
test_vld3_f32(float32_t const * a)10063 float32x2x3_t test_vld3_f32(float32_t const *a) {
10064 return vld3_f32(a);
10065 }
10066
10067 // CHECK-LABEL: @test_vld3_f64(
10068 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
10069 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
10070 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10071 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10072 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10073 // CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10074 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
10075 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10076 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
10077 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10078 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10079 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
10080 // CHECK: ret %struct.float64x1x3_t [[TMP6]]
test_vld3_f64(float64_t const * a)10081 float64x1x3_t test_vld3_f64(float64_t const *a) {
10082 return vld3_f64(a);
10083 }
10084
10085 // CHECK-LABEL: @test_vld3_p8(
10086 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
10087 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
10088 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10089 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10090 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10091 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10092 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10093 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
10094 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10095 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
10096 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
10097 // CHECK: ret %struct.poly8x8x3_t [[TMP5]]
test_vld3_p8(poly8_t const * a)10098 poly8x8x3_t test_vld3_p8(poly8_t const *a) {
10099 return vld3_p8(a);
10100 }
10101
10102 // CHECK-LABEL: @test_vld3_p16(
10103 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
10104 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
10105 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10106 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10107 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10108 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10109 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10110 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10111 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
10112 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10113 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10114 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
10115 // CHECK: ret %struct.poly16x4x3_t [[TMP6]]
test_vld3_p16(poly16_t const * a)10116 poly16x4x3_t test_vld3_p16(poly16_t const *a) {
10117 return vld3_p16(a);
10118 }
10119
10120 // CHECK-LABEL: @test_vld4q_u8(
10121 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
10122 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
10123 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10124 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10125 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10126 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10127 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10128 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
10129 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10130 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10131 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
10132 // CHECK: ret %struct.uint8x16x4_t [[TMP5]]
test_vld4q_u8(uint8_t const * a)10133 uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
10134 return vld4q_u8(a);
10135 }
10136
10137 // CHECK-LABEL: @test_vld4q_u16(
10138 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
10139 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
10140 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10141 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10142 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10143 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10144 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10145 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10146 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
10147 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10148 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10149 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
10150 // CHECK: ret %struct.uint16x8x4_t [[TMP6]]
test_vld4q_u16(uint16_t const * a)10151 uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
10152 return vld4q_u16(a);
10153 }
10154
10155 // CHECK-LABEL: @test_vld4q_u32(
10156 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
10157 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
10158 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10159 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10160 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10161 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10162 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10163 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10164 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
10165 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10166 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10167 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
10168 // CHECK: ret %struct.uint32x4x4_t [[TMP6]]
test_vld4q_u32(uint32_t const * a)10169 uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
10170 return vld4q_u32(a);
10171 }
10172
10173 // CHECK-LABEL: @test_vld4q_u64(
10174 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
10175 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
10176 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10177 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10178 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10179 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10180 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10181 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10182 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
10183 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10184 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10185 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
10186 // CHECK: ret %struct.uint64x2x4_t [[TMP6]]
test_vld4q_u64(uint64_t const * a)10187 uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
10188 return vld4q_u64(a);
10189 }
10190
10191 // CHECK-LABEL: @test_vld4q_s8(
10192 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
10193 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
10194 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10195 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10196 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10197 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10198 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10199 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
10200 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10201 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10202 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
10203 // CHECK: ret %struct.int8x16x4_t [[TMP5]]
test_vld4q_s8(int8_t const * a)10204 int8x16x4_t test_vld4q_s8(int8_t const *a) {
10205 return vld4q_s8(a);
10206 }
10207
10208 // CHECK-LABEL: @test_vld4q_s16(
10209 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
10210 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
10211 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10212 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10213 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10214 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10215 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10216 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10217 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
10218 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10219 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10220 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
10221 // CHECK: ret %struct.int16x8x4_t [[TMP6]]
test_vld4q_s16(int16_t const * a)10222 int16x8x4_t test_vld4q_s16(int16_t const *a) {
10223 return vld4q_s16(a);
10224 }
10225
10226 // CHECK-LABEL: @test_vld4q_s32(
10227 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
10228 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
10229 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10230 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10231 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10232 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10233 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10234 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10235 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
10236 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10237 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10238 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
10239 // CHECK: ret %struct.int32x4x4_t [[TMP6]]
test_vld4q_s32(int32_t const * a)10240 int32x4x4_t test_vld4q_s32(int32_t const *a) {
10241 return vld4q_s32(a);
10242 }
10243
10244 // CHECK-LABEL: @test_vld4q_s64(
10245 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
10246 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
10247 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10248 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10249 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10250 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10251 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10252 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10253 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
10254 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10255 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10256 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
10257 // CHECK: ret %struct.int64x2x4_t [[TMP6]]
test_vld4q_s64(int64_t const * a)10258 int64x2x4_t test_vld4q_s64(int64_t const *a) {
10259 return vld4q_s64(a);
10260 }
10261
10262 // CHECK-LABEL: @test_vld4q_f16(
10263 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
10264 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
10265 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10266 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10267 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
10268 // CHECK: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>* [[TMP2]])
10269 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
10270 // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
10271 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
10272 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10273 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10274 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
10275 // CHECK: ret %struct.float16x8x4_t [[TMP6]]
test_vld4q_f16(float16_t const * a)10276 float16x8x4_t test_vld4q_f16(float16_t const *a) {
10277 return vld4q_f16(a);
10278 }
10279
10280 // CHECK-LABEL: @test_vld4q_f32(
10281 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
10282 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
10283 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10284 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10285 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10286 // CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10287 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
10288 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
10289 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
10290 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10291 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10292 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
10293 // CHECK: ret %struct.float32x4x4_t [[TMP6]]
test_vld4q_f32(float32_t const * a)10294 float32x4x4_t test_vld4q_f32(float32_t const *a) {
10295 return vld4q_f32(a);
10296 }
10297
10298 // CHECK-LABEL: @test_vld4q_f64(
10299 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
10300 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
10301 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10302 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10303 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10304 // CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10305 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
10306 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
10307 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
10308 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10309 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10310 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
10311 // CHECK: ret %struct.float64x2x4_t [[TMP6]]
test_vld4q_f64(float64_t const * a)10312 float64x2x4_t test_vld4q_f64(float64_t const *a) {
10313 return vld4q_f64(a);
10314 }
10315
10316 // CHECK-LABEL: @test_vld4q_p8(
10317 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
10318 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
10319 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10320 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10321 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10322 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10323 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10324 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
10325 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10326 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10327 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
10328 // CHECK: ret %struct.poly8x16x4_t [[TMP5]]
test_vld4q_p8(poly8_t const * a)10329 poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
10330 return vld4q_p8(a);
10331 }
10332
10333 // CHECK-LABEL: @test_vld4q_p16(
10334 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
10335 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
10336 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10337 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10338 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10339 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10340 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10341 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10342 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
10343 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10344 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10345 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
10346 // CHECK: ret %struct.poly16x8x4_t [[TMP6]]
test_vld4q_p16(poly16_t const * a)10347 poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
10348 return vld4q_p16(a);
10349 }
10350
10351 // CHECK-LABEL: @test_vld4_u8(
10352 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
10353 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
10354 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10355 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10356 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10357 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10358 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10359 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
10360 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10361 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10362 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
10363 // CHECK: ret %struct.uint8x8x4_t [[TMP5]]
test_vld4_u8(uint8_t const * a)10364 uint8x8x4_t test_vld4_u8(uint8_t const *a) {
10365 return vld4_u8(a);
10366 }
10367
10368 // CHECK-LABEL: @test_vld4_u16(
10369 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
10370 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
10371 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10372 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10373 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10374 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10375 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10376 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10377 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
10378 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10379 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10380 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
10381 // CHECK: ret %struct.uint16x4x4_t [[TMP6]]
test_vld4_u16(uint16_t const * a)10382 uint16x4x4_t test_vld4_u16(uint16_t const *a) {
10383 return vld4_u16(a);
10384 }
10385
10386 // CHECK-LABEL: @test_vld4_u32(
10387 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
10388 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
10389 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10390 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10391 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10392 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10393 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10394 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10395 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
10396 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10397 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10398 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
10399 // CHECK: ret %struct.uint32x2x4_t [[TMP6]]
test_vld4_u32(uint32_t const * a)10400 uint32x2x4_t test_vld4_u32(uint32_t const *a) {
10401 return vld4_u32(a);
10402 }
10403
10404 // CHECK-LABEL: @test_vld4_u64(
10405 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
10406 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
10407 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10408 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10409 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10410 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10411 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10412 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10413 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
10414 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10415 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10416 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
10417 // CHECK: ret %struct.uint64x1x4_t [[TMP6]]
test_vld4_u64(uint64_t const * a)10418 uint64x1x4_t test_vld4_u64(uint64_t const *a) {
10419 return vld4_u64(a);
10420 }
10421
10422 // CHECK-LABEL: @test_vld4_s8(
10423 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
10424 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
10425 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10426 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10427 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10428 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10429 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10430 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
10431 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10432 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10433 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
10434 // CHECK: ret %struct.int8x8x4_t [[TMP5]]
test_vld4_s8(int8_t const * a)10435 int8x8x4_t test_vld4_s8(int8_t const *a) {
10436 return vld4_s8(a);
10437 }
10438
10439 // CHECK-LABEL: @test_vld4_s16(
10440 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
10441 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
10442 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10443 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10444 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10445 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10446 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10447 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10448 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
10449 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10450 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10451 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
10452 // CHECK: ret %struct.int16x4x4_t [[TMP6]]
test_vld4_s16(int16_t const * a)10453 int16x4x4_t test_vld4_s16(int16_t const *a) {
10454 return vld4_s16(a);
10455 }
10456
10457 // CHECK-LABEL: @test_vld4_s32(
10458 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
10459 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
10460 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10461 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10462 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10463 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10464 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10465 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10466 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
10467 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10468 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10469 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
10470 // CHECK: ret %struct.int32x2x4_t [[TMP6]]
test_vld4_s32(int32_t const * a)10471 int32x2x4_t test_vld4_s32(int32_t const *a) {
10472 return vld4_s32(a);
10473 }
10474
10475 // CHECK-LABEL: @test_vld4_s64(
10476 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
10477 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
10478 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10479 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10480 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10481 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10482 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10483 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10484 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
10485 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10486 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10487 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
10488 // CHECK: ret %struct.int64x1x4_t [[TMP6]]
test_vld4_s64(int64_t const * a)10489 int64x1x4_t test_vld4_s64(int64_t const *a) {
10490 return vld4_s64(a);
10491 }
10492
10493 // CHECK-LABEL: @test_vld4_f16(
10494 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
10495 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
10496 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10497 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10498 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10499 // CHECK: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10500 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
10501 // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10502 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
10503 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10504 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10505 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
10506 // CHECK: ret %struct.float16x4x4_t [[TMP6]]
test_vld4_f16(float16_t const * a)10507 float16x4x4_t test_vld4_f16(float16_t const *a) {
10508 return vld4_f16(a);
10509 }
10510
10511 // CHECK-LABEL: @test_vld4_f32(
10512 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
10513 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
10514 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10515 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10516 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10517 // CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10518 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
10519 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10520 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
10521 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10522 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10523 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
10524 // CHECK: ret %struct.float32x2x4_t [[TMP6]]
test_vld4_f32(float32_t const * a)10525 float32x2x4_t test_vld4_f32(float32_t const *a) {
10526 return vld4_f32(a);
10527 }
10528
10529 // CHECK-LABEL: @test_vld4_f64(
10530 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
10531 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
10532 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10533 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10534 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10535 // CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10536 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
10537 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10538 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
10539 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10540 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10541 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
10542 // CHECK: ret %struct.float64x1x4_t [[TMP6]]
test_vld4_f64(float64_t const * a)10543 float64x1x4_t test_vld4_f64(float64_t const *a) {
10544 return vld4_f64(a);
10545 }
10546
10547 // CHECK-LABEL: @test_vld4_p8(
10548 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
10549 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
10550 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10551 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10552 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10553 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10554 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10555 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
10556 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10557 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10558 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
10559 // CHECK: ret %struct.poly8x8x4_t [[TMP5]]
test_vld4_p8(poly8_t const * a)10560 poly8x8x4_t test_vld4_p8(poly8_t const *a) {
10561 return vld4_p8(a);
10562 }
10563
10564 // CHECK-LABEL: @test_vld4_p16(
10565 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
10566 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
10567 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10568 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10569 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10570 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10571 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10572 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10573 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
10574 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10575 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10576 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
10577 // CHECK: ret %struct.poly16x4x4_t [[TMP6]]
test_vld4_p16(poly16_t const * a)10578 poly16x4x4_t test_vld4_p16(poly16_t const *a) {
10579 return vld4_p16(a);
10580 }
10581
10582 // CHECK-LABEL: @test_vst1q_u8(
10583 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10584 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
10585 // CHECK: ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)10586 void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
10587 vst1q_u8(a, b);
10588 }
10589
10590 // CHECK-LABEL: @test_vst1q_u16(
10591 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10592 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10593 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10594 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10595 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10596 // CHECK: ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)10597 void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
10598 vst1q_u16(a, b);
10599 }
10600
10601 // CHECK-LABEL: @test_vst1q_u32(
10602 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10603 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10604 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10605 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10606 // CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10607 // CHECK: ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)10608 void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
10609 vst1q_u32(a, b);
10610 }
10611
10612 // CHECK-LABEL: @test_vst1q_u64(
10613 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10614 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10615 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10616 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10617 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10618 // CHECK: ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)10619 void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
10620 vst1q_u64(a, b);
10621 }
10622
10623 // CHECK-LABEL: @test_vst1q_s8(
10624 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10625 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
10626 // CHECK: ret void
test_vst1q_s8(int8_t * a,int8x16_t b)10627 void test_vst1q_s8(int8_t *a, int8x16_t b) {
10628 vst1q_s8(a, b);
10629 }
10630
10631 // CHECK-LABEL: @test_vst1q_s16(
10632 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10633 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10634 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10635 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10636 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10637 // CHECK: ret void
test_vst1q_s16(int16_t * a,int16x8_t b)10638 void test_vst1q_s16(int16_t *a, int16x8_t b) {
10639 vst1q_s16(a, b);
10640 }
10641
10642 // CHECK-LABEL: @test_vst1q_s32(
10643 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10644 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10645 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10646 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10647 // CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10648 // CHECK: ret void
test_vst1q_s32(int32_t * a,int32x4_t b)10649 void test_vst1q_s32(int32_t *a, int32x4_t b) {
10650 vst1q_s32(a, b);
10651 }
10652
10653 // CHECK-LABEL: @test_vst1q_s64(
10654 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10655 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10656 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10657 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10658 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10659 // CHECK: ret void
test_vst1q_s64(int64_t * a,int64x2_t b)10660 void test_vst1q_s64(int64_t *a, int64x2_t b) {
10661 vst1q_s64(a, b);
10662 }
10663
10664 // CHECK-LABEL: @test_vst1q_f16(
10665 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
10666 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
10667 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
10668 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
10669 // CHECK: store <8 x half> [[TMP3]], <8 x half>* [[TMP2]]
10670 // CHECK: ret void
test_vst1q_f16(float16_t * a,float16x8_t b)10671 void test_vst1q_f16(float16_t *a, float16x8_t b) {
10672 vst1q_f16(a, b);
10673 }
10674
10675 // CHECK-LABEL: @test_vst1q_f32(
10676 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
10677 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
10678 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
10679 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
10680 // CHECK: store <4 x float> [[TMP3]], <4 x float>* [[TMP2]]
10681 // CHECK: ret void
test_vst1q_f32(float32_t * a,float32x4_t b)10682 void test_vst1q_f32(float32_t *a, float32x4_t b) {
10683 vst1q_f32(a, b);
10684 }
10685
10686 // CHECK-LABEL: @test_vst1q_f64(
10687 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
10688 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
10689 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
10690 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
10691 // CHECK: store <2 x double> [[TMP3]], <2 x double>* [[TMP2]]
10692 // CHECK: ret void
test_vst1q_f64(float64_t * a,float64x2_t b)10693 void test_vst1q_f64(float64_t *a, float64x2_t b) {
10694 vst1q_f64(a, b);
10695 }
10696
10697 // CHECK-LABEL: @test_vst1q_p8(
10698 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10699 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
10700 // CHECK: ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)10701 void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
10702 vst1q_p8(a, b);
10703 }
10704
10705 // CHECK-LABEL: @test_vst1q_p16(
10706 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10707 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10708 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10709 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10710 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10711 // CHECK: ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)10712 void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
10713 vst1q_p16(a, b);
10714 }
10715
10716 // CHECK-LABEL: @test_vst1_u8(
10717 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10718 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
10719 // CHECK: ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)10720 void test_vst1_u8(uint8_t *a, uint8x8_t b) {
10721 vst1_u8(a, b);
10722 }
10723
10724 // CHECK-LABEL: @test_vst1_u16(
10725 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10726 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10727 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10728 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10729 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10730 // CHECK: ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)10731 void test_vst1_u16(uint16_t *a, uint16x4_t b) {
10732 vst1_u16(a, b);
10733 }
10734
10735 // CHECK-LABEL: @test_vst1_u32(
10736 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10737 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10738 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10739 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10740 // CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10741 // CHECK: ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)10742 void test_vst1_u32(uint32_t *a, uint32x2_t b) {
10743 vst1_u32(a, b);
10744 }
10745
10746 // CHECK-LABEL: @test_vst1_u64(
10747 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10748 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10749 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10750 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10751 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10752 // CHECK: ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)10753 void test_vst1_u64(uint64_t *a, uint64x1_t b) {
10754 vst1_u64(a, b);
10755 }
10756
10757 // CHECK-LABEL: @test_vst1_s8(
10758 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10759 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
10760 // CHECK: ret void
test_vst1_s8(int8_t * a,int8x8_t b)10761 void test_vst1_s8(int8_t *a, int8x8_t b) {
10762 vst1_s8(a, b);
10763 }
10764
10765 // CHECK-LABEL: @test_vst1_s16(
10766 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10767 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10768 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10769 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10770 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10771 // CHECK: ret void
test_vst1_s16(int16_t * a,int16x4_t b)10772 void test_vst1_s16(int16_t *a, int16x4_t b) {
10773 vst1_s16(a, b);
10774 }
10775
10776 // CHECK-LABEL: @test_vst1_s32(
10777 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10778 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10779 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10780 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10781 // CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10782 // CHECK: ret void
test_vst1_s32(int32_t * a,int32x2_t b)10783 void test_vst1_s32(int32_t *a, int32x2_t b) {
10784 vst1_s32(a, b);
10785 }
10786
10787 // CHECK-LABEL: @test_vst1_s64(
10788 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10789 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10790 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10791 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10792 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10793 // CHECK: ret void
test_vst1_s64(int64_t * a,int64x1_t b)10794 void test_vst1_s64(int64_t *a, int64x1_t b) {
10795 vst1_s64(a, b);
10796 }
10797
10798 // CHECK-LABEL: @test_vst1_f16(
10799 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
10800 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
10801 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
10802 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
10803 // CHECK: store <4 x half> [[TMP3]], <4 x half>* [[TMP2]]
10804 // CHECK: ret void
test_vst1_f16(float16_t * a,float16x4_t b)10805 void test_vst1_f16(float16_t *a, float16x4_t b) {
10806 vst1_f16(a, b);
10807 }
10808
10809 // CHECK-LABEL: @test_vst1_f32(
10810 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
10811 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
10812 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
10813 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
10814 // CHECK: store <2 x float> [[TMP3]], <2 x float>* [[TMP2]]
10815 // CHECK: ret void
test_vst1_f32(float32_t * a,float32x2_t b)10816 void test_vst1_f32(float32_t *a, float32x2_t b) {
10817 vst1_f32(a, b);
10818 }
10819
10820 // CHECK-LABEL: @test_vst1_f64(
10821 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
10822 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
10823 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
10824 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
10825 // CHECK: store <1 x double> [[TMP3]], <1 x double>* [[TMP2]]
10826 // CHECK: ret void
test_vst1_f64(float64_t * a,float64x1_t b)10827 void test_vst1_f64(float64_t *a, float64x1_t b) {
10828 vst1_f64(a, b);
10829 }
10830
10831 // CHECK-LABEL: @test_vst1_p8(
10832 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10833 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
10834 // CHECK: ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)10835 void test_vst1_p8(poly8_t *a, poly8x8_t b) {
10836 vst1_p8(a, b);
10837 }
10838
10839 // CHECK-LABEL: @test_vst1_p16(
10840 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10841 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10842 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10843 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10844 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10845 // CHECK: ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)10846 void test_vst1_p16(poly16_t *a, poly16x4_t b) {
10847 vst1_p16(a, b);
10848 }
10849
10850 // CHECK-LABEL: @test_vst2q_u8(
10851 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
10852 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
10853 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
10854 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
10855 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
10856 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
10857 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10858 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10859 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
10860 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
10861 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10862 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
10863 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
10864 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
10865 // CHECK: ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)10866 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
10867 vst2q_u8(a, b);
10868 }
10869
10870 // CHECK-LABEL: @test_vst2q_u16(
10871 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
10872 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
10873 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
10874 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
10875 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
10876 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
10877 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10878 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
10879 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10880 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
10881 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
10882 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10883 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10884 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
10885 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
10886 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10887 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10888 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10889 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
10890 // CHECK: ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)10891 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
10892 vst2q_u16(a, b);
10893 }
10894
10895 // CHECK-LABEL: @test_vst2q_u32(
10896 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
10897 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
10898 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
10899 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
10900 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
10901 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
10902 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10903 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
10904 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
10905 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
10906 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
10907 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10908 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
10909 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
10910 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
10911 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10912 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10913 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10914 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
10915 // CHECK: ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)10916 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
10917 vst2q_u32(a, b);
10918 }
10919
10920 // CHECK-LABEL: @test_vst2q_u64(
10921 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
10922 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
10923 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
10924 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
10925 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
10926 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
10927 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10928 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
10929 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
10930 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
10931 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
10932 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
10933 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
10934 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
10935 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
10936 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
10937 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
10938 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
10939 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
10940 // CHECK: ret void
test_vst2q_u64(uint64_t * a,uint64x2x2_t b)10941 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
10942 vst2q_u64(a, b);
10943 }
10944
10945 // CHECK-LABEL: @test_vst2q_s8(
10946 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
10947 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
10948 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
10949 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
10950 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
10951 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
10952 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10953 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
10954 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
10955 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
10956 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
10957 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
10958 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
10959 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
10960 // CHECK: ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)10961 void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
10962 vst2q_s8(a, b);
10963 }
10964
10965 // CHECK-LABEL: @test_vst2q_s16(
10966 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
10967 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
10968 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
10969 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
10970 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
10971 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
10972 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10973 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
10974 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
10975 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
10976 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
10977 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10978 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
10979 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
10980 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
10981 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10982 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10983 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10984 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
10985 // CHECK: ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)10986 void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
10987 vst2q_s16(a, b);
10988 }
10989
10990 // CHECK-LABEL: @test_vst2q_s32(
10991 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
10992 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
10993 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
10994 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
10995 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
10996 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
10997 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10998 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
10999 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11000 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
11001 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11002 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11003 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11004 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11005 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11006 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11007 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11008 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11009 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11010 // CHECK: ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)11011 void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
11012 vst2q_s32(a, b);
11013 }
11014
11015 // CHECK-LABEL: @test_vst2q_s64(
11016 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
11017 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
11018 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
11019 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11020 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
11021 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
11022 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11023 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11024 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11025 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11026 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11027 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11028 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11029 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11030 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11031 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11032 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11033 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11034 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11035 // CHECK: ret void
test_vst2q_s64(int64_t * a,int64x2x2_t b)11036 void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
11037 vst2q_s64(a, b);
11038 }
11039
11040 // CHECK-LABEL: @test_vst2q_f16(
11041 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
11042 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
11043 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
11044 // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
11045 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
11046 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
11047 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11048 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
11049 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11050 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
11051 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11052 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11053 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11054 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
11055 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11056 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11057 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11058 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11059 // CHECK: call void @llvm.aarch64.neon.st2.v8f16.p0i8(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i8* [[TMP2]])
11060 // CHECK: ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)11061 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
11062 vst2q_f16(a, b);
11063 }
11064
11065 // CHECK-LABEL: @test_vst2q_f32(
11066 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
11067 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
11068 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
11069 // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
11070 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
11071 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
11072 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11073 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
11074 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11075 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
11076 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11077 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11078 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11079 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
11080 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11081 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11082 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11083 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11084 // CHECK: call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]])
11085 // CHECK: ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)11086 void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
11087 vst2q_f32(a, b);
11088 }
11089
11090 // CHECK-LABEL: @test_vst2q_f64(
11091 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
11092 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
11093 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
11094 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
11095 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
11096 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
11097 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11098 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
11099 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11100 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
11101 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11102 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11103 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11104 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
11105 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11106 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11107 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11108 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11109 // CHECK: call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]])
11110 // CHECK: ret void
test_vst2q_f64(float64_t * a,float64x2x2_t b)11111 void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
11112 vst2q_f64(a, b);
11113 }
11114
11115 // CHECK-LABEL: @test_vst2q_p8(
11116 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
11117 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
11118 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
11119 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11120 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
11121 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
11122 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11123 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11124 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11125 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11126 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11127 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11128 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11129 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11130 // CHECK: ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)11131 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
11132 vst2q_p8(a, b);
11133 }
11134
11135 // CHECK-LABEL: @test_vst2q_p16(
11136 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
11137 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
11138 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
11139 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11140 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
11141 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
11142 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11143 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11144 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11145 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11146 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11147 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11148 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11149 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11150 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11151 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11152 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11153 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11154 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11155 // CHECK: ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)11156 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
11157 vst2q_p16(a, b);
11158 }
11159
11160 // CHECK-LABEL: @test_vst2_u8(
11161 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
11162 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
11163 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
11164 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11165 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
11166 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
11167 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11168 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11169 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11170 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11171 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11172 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11173 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11174 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11175 // CHECK: ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)11176 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
11177 vst2_u8(a, b);
11178 }
11179
11180 // CHECK-LABEL: @test_vst2_u16(
11181 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
11182 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
11183 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
11184 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11185 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
11186 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
11187 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11188 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11189 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11190 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11191 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11192 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11193 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11194 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11195 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11196 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11197 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11198 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11199 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11200 // CHECK: ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)11201 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
11202 vst2_u16(a, b);
11203 }
11204
11205 // CHECK-LABEL: @test_vst2_u32(
11206 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
11207 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
11208 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
11209 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11210 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
11211 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
11212 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11213 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11214 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11215 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11216 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11217 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11218 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11219 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11220 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11221 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11222 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11223 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11224 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11225 // CHECK: ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)11226 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
11227 vst2_u32(a, b);
11228 }
11229
11230 // CHECK-LABEL: @test_vst2_u64(
11231 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
11232 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
11233 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
11234 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11235 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
11236 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
11237 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11238 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11239 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11240 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11241 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11242 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11243 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11244 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11245 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11246 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11247 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11248 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11249 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11250 // CHECK: ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)11251 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
11252 vst2_u64(a, b);
11253 }
11254
11255 // CHECK-LABEL: @test_vst2_s8(
11256 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
11257 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
11258 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
11259 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11260 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
11261 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
11262 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11263 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11264 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11265 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11266 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11267 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11268 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11269 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11270 // CHECK: ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)11271 void test_vst2_s8(int8_t *a, int8x8x2_t b) {
11272 vst2_s8(a, b);
11273 }
11274
11275 // CHECK-LABEL: @test_vst2_s16(
11276 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
11277 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
11278 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
11279 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11280 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
11281 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
11282 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11283 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11284 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11285 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11286 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11287 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11288 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11289 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11290 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11291 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11292 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11293 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11294 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11295 // CHECK: ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)11296 void test_vst2_s16(int16_t *a, int16x4x2_t b) {
11297 vst2_s16(a, b);
11298 }
11299
11300 // CHECK-LABEL: @test_vst2_s32(
11301 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
11302 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
11303 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
11304 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11305 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
11306 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
11307 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11308 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11309 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11310 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11311 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11312 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11313 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11314 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11315 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11316 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11317 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11318 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11319 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11320 // CHECK: ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)11321 void test_vst2_s32(int32_t *a, int32x2x2_t b) {
11322 vst2_s32(a, b);
11323 }
11324
11325 // CHECK-LABEL: @test_vst2_s64(
11326 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
11327 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
11328 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
11329 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11330 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
11331 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
11332 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11333 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11334 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11335 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11336 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11337 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11338 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11339 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11340 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11341 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11342 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11343 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11344 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11345 // CHECK: ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)11346 void test_vst2_s64(int64_t *a, int64x1x2_t b) {
11347 vst2_s64(a, b);
11348 }
11349
11350 // CHECK-LABEL: @test_vst2_f16(
11351 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
11352 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
11353 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
11354 // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
11355 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
11356 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
11357 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11358 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
11359 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11360 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
11361 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
11362 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
11363 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11364 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
11365 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
11366 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
11367 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
11368 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
11369 // CHECK: call void @llvm.aarch64.neon.st2.v4f16.p0i8(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i8* [[TMP2]])
11370 // CHECK: ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)11371 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
11372 vst2_f16(a, b);
11373 }
11374
11375 // CHECK-LABEL: @test_vst2_f32(
11376 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
11377 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
11378 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
11379 // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
11380 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
11381 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
11382 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11383 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
11384 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11385 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
11386 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
11387 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
11388 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11389 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
11390 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
11391 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
11392 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
11393 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
11394 // CHECK: call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]])
11395 // CHECK: ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)11396 void test_vst2_f32(float32_t *a, float32x2x2_t b) {
11397 vst2_f32(a, b);
11398 }
11399
11400 // CHECK-LABEL: @test_vst2_f64(
11401 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
11402 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
11403 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
11404 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
11405 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
11406 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
11407 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11408 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
11409 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11410 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
11411 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
11412 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
11413 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11414 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
11415 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
11416 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
11417 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
11418 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
11419 // CHECK: call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]])
11420 // CHECK: ret void
test_vst2_f64(float64_t * a,float64x1x2_t b)11421 void test_vst2_f64(float64_t *a, float64x1x2_t b) {
11422 vst2_f64(a, b);
11423 }
11424
11425 // CHECK-LABEL: @test_vst2_p8(
11426 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
11427 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
11428 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
11429 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11430 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
11431 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
11432 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11433 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11434 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11435 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11436 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11437 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11438 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11439 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11440 // CHECK: ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)11441 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
11442 vst2_p8(a, b);
11443 }
11444
11445 // CHECK-LABEL: @test_vst2_p16(
11446 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
11447 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
11448 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
11449 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11450 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
11451 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
11452 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11453 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11454 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11455 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11456 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11457 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11458 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11459 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11460 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11461 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11462 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11463 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11464 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11465 // CHECK: ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)11466 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
11467 vst2_p16(a, b);
11468 }
11469
11470 // CHECK-LABEL: @test_vst3q_u8(
11471 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
11472 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
11473 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
11474 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11475 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
11476 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
11477 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11478 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11479 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11480 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11481 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11482 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11483 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11484 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11485 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11486 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11487 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11488 // CHECK: ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)11489 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
11490 vst3q_u8(a, b);
11491 }
11492
11493 // CHECK-LABEL: @test_vst3q_u16(
11494 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
11495 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
11496 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
11497 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11498 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
11499 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
11500 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11501 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11502 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11503 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11504 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11505 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11506 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11507 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11508 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11509 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11510 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11511 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11512 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11513 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11514 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11515 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11516 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11517 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11518 // CHECK: ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)11519 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
11520 vst3q_u16(a, b);
11521 }
11522
11523 // CHECK-LABEL: @test_vst3q_u32(
11524 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
11525 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
11526 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
11527 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11528 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
11529 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
11530 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11531 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11532 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11533 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11534 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11535 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11536 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11537 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11538 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11539 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11540 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11541 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11542 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11543 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11544 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11545 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11546 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11547 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11548 // CHECK: ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)11549 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
11550 vst3q_u32(a, b);
11551 }
11552
11553 // CHECK-LABEL: @test_vst3q_u64(
11554 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
11555 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
11556 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
11557 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11558 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
11559 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
11560 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11561 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11562 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11563 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11564 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11565 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11566 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11567 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11568 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11569 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11570 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11571 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11572 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11573 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11574 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11575 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11576 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11577 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11578 // CHECK: ret void
test_vst3q_u64(uint64_t * a,uint64x2x3_t b)11579 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
11580 vst3q_u64(a, b);
11581 }
11582
11583 // CHECK-LABEL: @test_vst3q_s8(
11584 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
11585 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
11586 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
11587 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11588 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
11589 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
11590 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11591 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11592 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11593 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11594 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11595 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11596 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11597 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11598 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11599 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11600 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11601 // CHECK: ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)11602 void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
11603 vst3q_s8(a, b);
11604 }
11605
11606 // CHECK-LABEL: @test_vst3q_s16(
11607 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
11608 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
11609 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
11610 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11611 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
11612 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
11613 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11614 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11615 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11616 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11617 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11618 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11619 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11620 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11621 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11622 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11623 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11624 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11625 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11626 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11627 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11628 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11629 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11630 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11631 // CHECK: ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)11632 void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
11633 vst3q_s16(a, b);
11634 }
11635
11636 // CHECK-LABEL: @test_vst3q_s32(
11637 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
11638 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
11639 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
11640 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11641 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
11642 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
11643 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11644 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11645 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11646 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11647 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11648 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11649 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11650 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11651 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11652 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11653 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11654 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11655 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11656 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11657 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11658 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11659 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11660 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11661 // CHECK: ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)11662 void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
11663 vst3q_s32(a, b);
11664 }
11665
11666 // CHECK-LABEL: @test_vst3q_s64(
11667 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
11668 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
11669 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
11670 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11671 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
11672 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
11673 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11674 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11675 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11676 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11677 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11678 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11679 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11680 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11681 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11682 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11683 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11684 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11685 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11686 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11687 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11688 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11689 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11690 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11691 // CHECK: ret void
test_vst3q_s64(int64_t * a,int64x2x3_t b)11692 void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
11693 vst3q_s64(a, b);
11694 }
11695
11696 // CHECK-LABEL: @test_vst3q_f16(
11697 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
11698 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
11699 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
11700 // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
11701 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
11702 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
11703 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11704 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
11705 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11706 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
11707 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11708 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11709 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11710 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
11711 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11712 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11713 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11714 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
11715 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
11716 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
11717 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11718 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11719 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
11720 // CHECK: call void @llvm.aarch64.neon.st3.v8f16.p0i8(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i8* [[TMP2]])
11721 // CHECK: ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)11722 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
11723 vst3q_f16(a, b);
11724 }
11725
11726 // CHECK-LABEL: @test_vst3q_f32(
11727 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
11728 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
11729 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
11730 // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
11731 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
11732 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
11733 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11734 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
11735 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11736 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
11737 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11738 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11739 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11740 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
11741 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11742 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11743 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11744 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
11745 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
11746 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
11747 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11748 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11749 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
11750 // CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]])
11751 // CHECK: ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)11752 void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
11753 vst3q_f32(a, b);
11754 }
11755
11756 // CHECK-LABEL: @test_vst3q_f64(
11757 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
11758 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
11759 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
11760 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
11761 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
11762 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
11763 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11764 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
11765 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11766 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
11767 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11768 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11769 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11770 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
11771 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11772 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11773 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11774 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
11775 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
11776 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
11777 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11778 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11779 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
11780 // CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]])
11781 // CHECK: ret void
test_vst3q_f64(float64_t * a,float64x2x3_t b)11782 void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
11783 vst3q_f64(a, b);
11784 }
11785
11786 // CHECK-LABEL: @test_vst3q_p8(
11787 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
11788 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
11789 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
11790 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11791 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
11792 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
11793 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11794 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11795 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11796 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11797 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11798 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11799 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11800 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11801 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11802 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11803 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11804 // CHECK: ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)11805 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
11806 vst3q_p8(a, b);
11807 }
11808
11809 // CHECK-LABEL: @test_vst3q_p16(
11810 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
11811 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
11812 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
11813 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11814 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
11815 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
11816 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11817 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11818 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11819 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11820 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11821 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11822 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11823 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11824 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11825 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11826 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11827 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11828 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11829 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11830 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11831 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11832 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11833 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11834 // CHECK: ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)11835 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
11836 vst3q_p16(a, b);
11837 }
11838
11839 // CHECK-LABEL: @test_vst3_u8(
11840 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
11841 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
11842 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
11843 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
11844 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
11845 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
11846 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11847 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11848 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
11849 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11850 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11851 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11852 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11853 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11854 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
11855 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
11856 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
11857 // CHECK: ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)11858 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
11859 vst3_u8(a, b);
11860 }
11861
11862 // CHECK-LABEL: @test_vst3_u16(
11863 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
11864 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
11865 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
11866 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
11867 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
11868 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
11869 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11870 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11871 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11872 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
11873 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11874 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11875 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11876 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11877 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11878 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11879 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11880 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
11881 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
11882 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11883 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11884 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11885 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11886 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
11887 // CHECK: ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)11888 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
11889 vst3_u16(a, b);
11890 }
11891
11892 // CHECK-LABEL: @test_vst3_u32(
11893 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
11894 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
11895 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
11896 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
11897 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
11898 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
11899 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11900 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11901 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11902 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
11903 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11904 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11905 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11906 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11907 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11908 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11909 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11910 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
11911 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
11912 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
11913 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11914 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11915 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
11916 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
11917 // CHECK: ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)11918 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
11919 vst3_u32(a, b);
11920 }
11921
11922 // CHECK-LABEL: @test_vst3_u64(
11923 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
11924 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
11925 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
11926 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
11927 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
11928 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
11929 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11930 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11931 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11932 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
11933 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11934 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11935 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11936 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11937 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11938 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11939 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11940 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
11941 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
11942 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
11943 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11944 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11945 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
11946 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
11947 // CHECK: ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)11948 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
11949 vst3_u64(a, b);
11950 }
11951
11952 // CHECK-LABEL: @test_vst3_s8(
11953 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
11954 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
11955 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
11956 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
11957 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
11958 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
11959 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11960 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11961 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
11962 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11963 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11964 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11965 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11966 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11967 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
11968 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
11969 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
11970 // CHECK: ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)11971 void test_vst3_s8(int8_t *a, int8x8x3_t b) {
11972 vst3_s8(a, b);
11973 }
11974
11975 // CHECK-LABEL: @test_vst3_s16(
11976 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
11977 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
11978 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
11979 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
11980 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
11981 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
11982 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11983 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11984 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11985 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
11986 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11987 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11988 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11989 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11990 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11991 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11992 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11993 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
11994 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
11995 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11996 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11997 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11998 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11999 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12000 // CHECK: ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)12001 void test_vst3_s16(int16_t *a, int16x4x3_t b) {
12002 vst3_s16(a, b);
12003 }
12004
12005 // CHECK-LABEL: @test_vst3_s32(
12006 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
12007 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
12008 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
12009 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
12010 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
12011 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
12012 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12013 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12014 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12015 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12016 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12017 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12018 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12019 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12020 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12021 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12022 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12023 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12024 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12025 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12026 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12027 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12028 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12029 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12030 // CHECK: ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)12031 void test_vst3_s32(int32_t *a, int32x2x3_t b) {
12032 vst3_s32(a, b);
12033 }
12034
12035 // CHECK-LABEL: @test_vst3_s64(
12036 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
12037 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
12038 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
12039 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12040 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
12041 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
12042 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12043 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12044 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12045 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12046 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12047 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12048 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12049 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12050 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12051 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12052 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12053 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12054 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12055 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12056 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12057 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12058 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12059 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12060 // CHECK: ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)12061 void test_vst3_s64(int64_t *a, int64x1x3_t b) {
12062 vst3_s64(a, b);
12063 }
12064
12065 // CHECK-LABEL: @test_vst3_f16(
12066 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
12067 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
12068 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
12069 // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
12070 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
12071 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
12072 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12073 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
12074 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12075 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
12076 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12077 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12078 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12079 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
12080 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12081 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12082 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12083 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
12084 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
12085 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12086 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12087 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12088 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12089 // CHECK: call void @llvm.aarch64.neon.st3.v4f16.p0i8(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i8* [[TMP2]])
12090 // CHECK: ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)12091 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
12092 vst3_f16(a, b);
12093 }
12094
12095 // CHECK-LABEL: @test_vst3_f32(
12096 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
12097 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
12098 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
12099 // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
12100 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
12101 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
12102 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12103 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
12104 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12105 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
12106 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12107 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12108 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12109 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
12110 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12111 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12112 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12113 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
12114 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
12115 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12116 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12117 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12118 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12119 // CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]])
12120 // CHECK: ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)12121 void test_vst3_f32(float32_t *a, float32x2x3_t b) {
12122 vst3_f32(a, b);
12123 }
12124
12125 // CHECK-LABEL: @test_vst3_f64(
12126 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
12127 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
12128 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
12129 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
12130 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
12131 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
12132 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12133 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
12134 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12135 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
12136 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12137 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12138 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12139 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
12140 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12141 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12142 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12143 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
12144 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
12145 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12146 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12147 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12148 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12149 // CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]])
12150 // CHECK: ret void
test_vst3_f64(float64_t * a,float64x1x3_t b)12151 void test_vst3_f64(float64_t *a, float64x1x3_t b) {
12152 vst3_f64(a, b);
12153 }
12154
12155 // CHECK-LABEL: @test_vst3_p8(
12156 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
12157 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
12158 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
12159 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12160 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
12161 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
12162 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12163 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12164 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12165 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12166 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12167 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12168 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12169 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12170 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12171 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12172 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12173 // CHECK: ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)12174 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
12175 vst3_p8(a, b);
12176 }
12177
12178 // CHECK-LABEL: @test_vst3_p16(
12179 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
12180 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
12181 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
12182 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12183 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
12184 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
12185 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12186 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12187 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12188 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12189 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12190 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12191 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12192 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12193 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12194 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12195 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12196 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12197 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12198 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12199 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12200 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12201 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12202 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12203 // CHECK: ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)12204 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
12205 vst3_p16(a, b);
12206 }
12207
12208 // CHECK-LABEL: @test_vst4q_u8(
12209 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
12210 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
12211 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
12212 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12213 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
12214 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
12215 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12216 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12217 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12218 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12219 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12220 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12221 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12222 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12223 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12224 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12225 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12226 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12227 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12228 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12229 // CHECK: ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)12230 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
12231 vst4q_u8(a, b);
12232 }
12233
12234 // CHECK-LABEL: @test_vst4q_u16(
12235 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
12236 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
12237 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
12238 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12239 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
12240 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
12241 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12242 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12243 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12244 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12245 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12246 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12247 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12248 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12249 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12250 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12251 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12252 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12253 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12254 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12255 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12256 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12257 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12258 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12259 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12260 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12261 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12262 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12263 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12264 // CHECK: ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)12265 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
12266 vst4q_u16(a, b);
12267 }
12268
12269 // CHECK-LABEL: @test_vst4q_u32(
12270 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
12271 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
12272 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
12273 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12274 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
12275 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
12276 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12277 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12278 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12279 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12280 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12281 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12282 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12283 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12284 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12285 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12286 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12287 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12288 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12289 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12290 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12291 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12292 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12293 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12294 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12295 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12296 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12297 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12298 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12299 // CHECK: ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)12300 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
12301 vst4q_u32(a, b);
12302 }
12303
12304 // CHECK-LABEL: @test_vst4q_u64(
12305 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
12306 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
12307 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
12308 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12309 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
12310 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
12311 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12312 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12313 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12314 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12315 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12316 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12317 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12318 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12319 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12320 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12321 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12322 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12323 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12324 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12325 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12326 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12327 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12328 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12329 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12330 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12331 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12332 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12333 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12334 // CHECK: ret void
test_vst4q_u64(uint64_t * a,uint64x2x4_t b)12335 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
12336 vst4q_u64(a, b);
12337 }
12338
12339 // CHECK-LABEL: @test_vst4q_s8(
12340 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
12341 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
12342 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
12343 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12344 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
12345 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
12346 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12347 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12348 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12349 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12350 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12351 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12352 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12353 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12354 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12355 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12356 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12357 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12358 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12359 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12360 // CHECK: ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)12361 void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
12362 vst4q_s8(a, b);
12363 }
12364
12365 // CHECK-LABEL: @test_vst4q_s16(
12366 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
12367 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
12368 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
12369 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12370 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
12371 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
12372 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12373 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12374 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12375 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12376 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12377 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12378 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12379 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12380 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12381 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12382 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12383 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12384 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12385 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12386 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12387 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12388 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12389 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12390 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12391 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12392 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12393 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12394 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12395 // CHECK: ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)12396 void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
12397 vst4q_s16(a, b);
12398 }
12399
12400 // CHECK-LABEL: @test_vst4q_s32(
12401 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
12402 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
12403 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
12404 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12405 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
12406 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
12407 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12408 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12409 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12410 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12411 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12412 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12413 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12414 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12415 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12416 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12417 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12418 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12419 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12420 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12421 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12422 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12423 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12424 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12425 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12426 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12427 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12428 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12429 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12430 // CHECK: ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)12431 void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
12432 vst4q_s32(a, b);
12433 }
12434
12435 // CHECK-LABEL: @test_vst4q_s64(
12436 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
12437 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
12438 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
12439 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12440 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
12441 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
12442 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12443 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12444 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12445 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12446 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12447 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12448 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12449 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12450 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12451 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12452 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12453 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12454 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12455 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12456 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12457 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12458 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12459 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12460 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12461 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12462 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12463 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12464 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12465 // CHECK: ret void
test_vst4q_s64(int64_t * a,int64x2x4_t b)12466 void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
12467 vst4q_s64(a, b);
12468 }
12469
12470 // CHECK-LABEL: @test_vst4q_f16(
12471 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
12472 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
12473 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
12474 // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
12475 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
12476 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
12477 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12478 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
12479 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12480 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
12481 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
12482 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
12483 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12484 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
12485 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
12486 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
12487 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12488 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
12489 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
12490 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
12491 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12492 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
12493 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
12494 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
12495 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
12496 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
12497 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
12498 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
12499 // CHECK: call void @llvm.aarch64.neon.st4.v8f16.p0i8(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i8* [[TMP2]])
12500 // CHECK: ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)12501 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
12502 vst4q_f16(a, b);
12503 }
12504
12505 // CHECK-LABEL: @test_vst4q_f32(
12506 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
12507 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
12508 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
12509 // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
12510 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
12511 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
12512 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12513 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
12514 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12515 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
12516 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
12517 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
12518 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12519 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
12520 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12521 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12522 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12523 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
12524 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
12525 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
12526 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12527 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
12528 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
12529 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
12530 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12531 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12532 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
12533 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
12534 // CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]])
12535 // CHECK: ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)12536 void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
12537 vst4q_f32(a, b);
12538 }
12539
12540 // CHECK-LABEL: @test_vst4q_f64(
12541 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
12542 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
12543 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
12544 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
12545 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
12546 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
12547 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12548 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
12549 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12550 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
12551 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12552 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12553 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12554 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
12555 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12556 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12557 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12558 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
12559 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
12560 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12561 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12562 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
12563 // CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
12564 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
12565 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12566 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12567 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12568 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
12569 // CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]])
12570 // CHECK: ret void
test_vst4q_f64(float64_t * a,float64x2x4_t b)12571 void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
12572 vst4q_f64(a, b);
12573 }
12574
12575 // CHECK-LABEL: @test_vst4q_p8(
12576 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
12577 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
12578 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
12579 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12580 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
12581 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
12582 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12583 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12584 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12585 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12586 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12587 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12588 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12589 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12590 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12591 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12592 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12593 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12594 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12595 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12596 // CHECK: ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)12597 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
12598 vst4q_p8(a, b);
12599 }
12600
12601 // CHECK-LABEL: @test_vst4q_p16(
12602 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
12603 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
12604 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
12605 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12606 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
12607 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
12608 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12609 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12610 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12611 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12612 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12613 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12614 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12615 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12616 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12617 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12618 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12619 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12620 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12621 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12622 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12623 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12624 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12625 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12626 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12627 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12628 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12629 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12630 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12631 // CHECK: ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)12632 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
12633 vst4q_p16(a, b);
12634 }
12635
12636 // CHECK-LABEL: @test_vst4_u8(
12637 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
12638 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
12639 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
12640 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12641 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
12642 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
12643 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12644 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12645 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12646 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12647 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12648 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12649 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12650 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12651 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12652 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12653 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12654 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12655 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12656 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12657 // CHECK: ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)12658 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
12659 vst4_u8(a, b);
12660 }
12661
12662 // CHECK-LABEL: @test_vst4_u16(
12663 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
12664 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
12665 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
12666 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12667 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
12668 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
12669 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12670 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12671 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12672 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12673 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12674 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12675 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12676 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12677 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12678 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12679 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12680 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12681 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12682 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12683 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12684 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12685 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12686 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12687 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12688 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12689 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12690 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12691 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12692 // CHECK: ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)12693 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
12694 vst4_u16(a, b);
12695 }
12696
12697 // CHECK-LABEL: @test_vst4_u32(
12698 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
12699 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
12700 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
12701 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12702 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
12703 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
12704 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12705 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12706 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12707 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12708 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12709 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12710 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12711 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12712 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12713 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12714 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12715 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12716 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12717 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12718 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12719 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12720 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12721 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12722 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12723 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12724 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12725 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12726 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12727 // CHECK: ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)12728 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
12729 vst4_u32(a, b);
12730 }
12731
12732 // CHECK-LABEL: @test_vst4_u64(
12733 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
12734 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
12735 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
12736 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12737 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
12738 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
12739 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12740 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12741 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12742 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12743 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12744 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12745 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12746 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12747 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12748 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12749 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12750 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12751 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12752 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12753 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12754 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12755 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12756 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12757 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12758 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12759 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12760 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12761 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12762 // CHECK: ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)12763 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
12764 vst4_u64(a, b);
12765 }
12766
12767 // CHECK-LABEL: @test_vst4_s8(
12768 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
12769 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
12770 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
12771 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12772 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
12773 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
12774 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12775 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12776 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12777 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12778 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12779 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12780 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12781 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12782 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12783 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12784 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12785 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12786 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12787 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12788 // CHECK: ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)12789 void test_vst4_s8(int8_t *a, int8x8x4_t b) {
12790 vst4_s8(a, b);
12791 }
12792
12793 // CHECK-LABEL: @test_vst4_s16(
12794 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
12795 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
12796 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
12797 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12798 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
12799 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
12800 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12801 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12802 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12803 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12804 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12805 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12806 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12807 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12808 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12809 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12810 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12811 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12812 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12813 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12814 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12815 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12816 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12817 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12818 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12819 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12820 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12821 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12822 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12823 // CHECK: ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)12824 void test_vst4_s16(int16_t *a, int16x4x4_t b) {
12825 vst4_s16(a, b);
12826 }
12827
12828 // CHECK-LABEL: @test_vst4_s32(
12829 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
12830 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
12831 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
12832 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12833 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
12834 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
12835 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12836 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12837 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12838 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12839 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12840 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12841 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12842 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12843 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12844 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12845 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12846 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12847 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12848 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12849 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12850 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12851 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12852 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12853 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12854 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12855 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12856 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12857 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12858 // CHECK: ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)12859 void test_vst4_s32(int32_t *a, int32x2x4_t b) {
12860 vst4_s32(a, b);
12861 }
12862
12863 // CHECK-LABEL: @test_vst4_s64(
12864 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
12865 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
12866 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
12867 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12868 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
12869 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
12870 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12871 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12872 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12873 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12874 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12875 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12876 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12877 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12878 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12879 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12880 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12881 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12882 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12883 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12884 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12885 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12886 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12887 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12888 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12889 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12890 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12891 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12892 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12893 // CHECK: ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)12894 void test_vst4_s64(int64_t *a, int64x1x4_t b) {
12895 vst4_s64(a, b);
12896 }
12897
12898 // CHECK-LABEL: @test_vst4_f16(
12899 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
12900 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
12901 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
12902 // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
12903 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
12904 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
12905 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12906 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
12907 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12908 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
12909 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12910 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12911 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12912 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
12913 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12914 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12915 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12916 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
12917 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
12918 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12919 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12920 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
12921 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
12922 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
12923 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12924 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12925 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12926 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
12927 // CHECK: call void @llvm.aarch64.neon.st4.v4f16.p0i8(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i8* [[TMP2]])
12928 // CHECK: ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)12929 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
12930 vst4_f16(a, b);
12931 }
12932
12933 // CHECK-LABEL: @test_vst4_f32(
12934 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
12935 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
12936 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
12937 // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
12938 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
12939 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
12940 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12941 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
12942 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12943 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
12944 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12945 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12946 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12947 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
12948 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12949 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12950 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12951 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
12952 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
12953 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12954 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12955 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
12956 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
12957 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
12958 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12959 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12960 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12961 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
12962 // CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]])
12963 // CHECK: ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)12964 void test_vst4_f32(float32_t *a, float32x2x4_t b) {
12965 vst4_f32(a, b);
12966 }
12967
12968 // CHECK-LABEL: @test_vst4_f64(
12969 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
12970 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
12971 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
12972 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
12973 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
12974 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
12975 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12976 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
12977 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12978 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
12979 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12980 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12981 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12982 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
12983 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12984 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12985 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12986 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
12987 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
12988 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12989 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12990 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
12991 // CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
12992 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
12993 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12994 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12995 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12996 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
12997 // CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]])
12998 // CHECK: ret void
test_vst4_f64(float64_t * a,float64x1x4_t b)12999 void test_vst4_f64(float64_t *a, float64x1x4_t b) {
13000 vst4_f64(a, b);
13001 }
13002
13003 // CHECK-LABEL: @test_vst4_p8(
13004 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
13005 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
13006 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
13007 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13008 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
13009 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
13010 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13011 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13012 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13013 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13014 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13015 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13016 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13017 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13018 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13019 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13020 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13021 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13022 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13023 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13024 // CHECK: ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)13025 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
13026 vst4_p8(a, b);
13027 }
13028
13029 // CHECK-LABEL: @test_vst4_p16(
13030 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
13031 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
13032 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
13033 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13034 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
13035 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
13036 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13037 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
13038 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13039 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13040 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13041 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13042 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13043 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13044 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13045 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13046 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13047 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13048 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13049 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13050 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13051 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13052 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13053 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13054 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13055 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13056 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13057 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13058 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13059 // CHECK: ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)13060 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
13061 vst4_p16(a, b);
13062 }
13063
13064 // CHECK-LABEL: @test_vld1q_f64_x2(
13065 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
13066 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
13067 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13068 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13069 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13070 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]])
13071 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
13072 // CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
13073 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
13074 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13075 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13076 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
13077 // CHECK: ret %struct.float64x2x2_t [[TMP6]]
test_vld1q_f64_x2(float64_t const * a)13078 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
13079 return vld1q_f64_x2(a);
13080 }
13081
13082 // CHECK-LABEL: @test_vld1q_p64_x2(
13083 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
13084 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
13085 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13086 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13087 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13088 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
13089 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
13090 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
13091 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
13092 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13093 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13094 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
13095 // CHECK: ret %struct.poly64x2x2_t [[TMP6]]
test_vld1q_p64_x2(poly64_t const * a)13096 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
13097 return vld1q_p64_x2(a);
13098 }
13099
13100 // CHECK-LABEL: @test_vld1_f64_x2(
13101 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
13102 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
13103 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13104 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13105 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13106 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]])
13107 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
13108 // CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
13109 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
13110 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13111 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13112 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
13113 // CHECK: ret %struct.float64x1x2_t [[TMP6]]
test_vld1_f64_x2(float64_t const * a)13114 float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
13115 return vld1_f64_x2(a);
13116 }
13117
13118 // CHECK-LABEL: @test_vld1_p64_x2(
13119 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
13120 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
13121 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13122 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13123 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13124 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
13125 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
13126 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
13127 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
13128 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13129 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13130 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
13131 // CHECK: ret %struct.poly64x1x2_t [[TMP6]]
test_vld1_p64_x2(poly64_t const * a)13132 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
13133 return vld1_p64_x2(a);
13134 }
13135
13136 // CHECK-LABEL: @test_vld1q_f64_x3(
13137 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
13138 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
13139 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13140 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13141 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13142 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]])
13143 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
13144 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13145 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
13146 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13147 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13148 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
13149 // CHECK: ret %struct.float64x2x3_t [[TMP6]]
test_vld1q_f64_x3(float64_t const * a)13150 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
13151 return vld1q_f64_x3(a);
13152 }
13153
13154 // CHECK-LABEL: @test_vld1q_p64_x3(
13155 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
13156 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
13157 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13158 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13159 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13160 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
13161 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
13162 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13163 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
13164 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13165 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13166 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
13167 // CHECK: ret %struct.poly64x2x3_t [[TMP6]]
test_vld1q_p64_x3(poly64_t const * a)13168 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
13169 return vld1q_p64_x3(a);
13170 }
13171
13172 // CHECK-LABEL: @test_vld1_f64_x3(
13173 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
13174 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
13175 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13176 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13177 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13178 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]])
13179 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
13180 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13181 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
13182 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13183 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13184 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
13185 // CHECK: ret %struct.float64x1x3_t [[TMP6]]
test_vld1_f64_x3(float64_t const * a)13186 float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
13187 return vld1_f64_x3(a);
13188 }
13189
13190 // CHECK-LABEL: @test_vld1_p64_x3(
13191 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
13192 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
13193 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13194 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13195 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13196 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
13197 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
13198 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13199 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
13200 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13201 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13202 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
13203 // CHECK: ret %struct.poly64x1x3_t [[TMP6]]
test_vld1_p64_x3(poly64_t const * a)13204 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
13205 return vld1_p64_x3(a);
13206 }
13207
13208 // CHECK-LABEL: @test_vld1q_f64_x4(
13209 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
13210 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
13211 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13212 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13213 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13214 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]])
13215 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
13216 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13217 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
13218 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13219 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13220 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
13221 // CHECK: ret %struct.float64x2x4_t [[TMP6]]
test_vld1q_f64_x4(float64_t const * a)13222 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
13223 return vld1q_f64_x4(a);
13224 }
13225
13226 // CHECK-LABEL: @test_vld1q_p64_x4(
13227 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
13228 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
13229 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13230 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13231 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13232 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
13233 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
13234 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13235 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
13236 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13237 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13238 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
13239 // CHECK: ret %struct.poly64x2x4_t [[TMP6]]
test_vld1q_p64_x4(poly64_t const * a)13240 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
13241 return vld1q_p64_x4(a);
13242 }
13243
13244 // CHECK-LABEL: @test_vld1_f64_x4(
13245 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
13246 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
13247 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13248 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13249 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13250 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]])
13251 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
13252 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13253 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
13254 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13255 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13256 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
13257 // CHECK: ret %struct.float64x1x4_t [[TMP6]]
test_vld1_f64_x4(float64_t const * a)13258 float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
13259 return vld1_f64_x4(a);
13260 }
13261
13262 // CHECK-LABEL: @test_vld1_p64_x4(
13263 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
13264 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
13265 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13266 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13267 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13268 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
13269 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
13270 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13271 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
13272 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13273 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13274 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
13275 // CHECK: ret %struct.poly64x1x4_t [[TMP6]]
test_vld1_p64_x4(poly64_t const * a)13276 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
13277 return vld1_p64_x4(a);
13278 }
13279
13280 // CHECK-LABEL: @test_vst1q_f64_x2(
13281 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
13282 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
13283 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
13284 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
13285 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
13286 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
13287 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13288 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13289 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13290 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
13291 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13292 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13293 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13294 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
13295 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13296 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13297 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13298 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13299 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13300 // CHECK: call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]])
13301 // CHECK: ret void
test_vst1q_f64_x2(float64_t * a,float64x2x2_t b)13302 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
13303 vst1q_f64_x2(a, b);
13304 }
13305
13306 // CHECK-LABEL: @test_vst1q_p64_x2(
13307 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
13308 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
13309 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
13310 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
13311 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
13312 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
13313 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13314 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13315 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13316 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
13317 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13318 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13319 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13320 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13321 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13322 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13323 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13324 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13325 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13326 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
13327 // CHECK: ret void
test_vst1q_p64_x2(poly64_t * a,poly64x2x2_t b)13328 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
13329 vst1q_p64_x2(a, b);
13330 }
13331
13332 // CHECK-LABEL: @test_vst1_f64_x2(
13333 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
13334 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
13335 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
13336 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
13337 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
13338 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
13339 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13340 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13341 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13342 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
13343 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13344 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13345 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13346 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
13347 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13348 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13349 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13350 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13351 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13352 // CHECK: call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]])
13353 // CHECK: ret void
test_vst1_f64_x2(float64_t * a,float64x1x2_t b)13354 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
13355 vst1_f64_x2(a, b);
13356 }
13357
13358 // CHECK-LABEL: @test_vst1_p64_x2(
13359 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
13360 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
13361 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
13362 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
13363 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
13364 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
13365 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13366 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13367 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13368 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
13369 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13370 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13371 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13372 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13373 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13374 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13375 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13376 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13377 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13378 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
13379 // CHECK: ret void
test_vst1_p64_x2(poly64_t * a,poly64x1x2_t b)13380 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
13381 vst1_p64_x2(a, b);
13382 }
13383
13384 // CHECK-LABEL: @test_vst1q_f64_x3(
13385 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
13386 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
13387 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
13388 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
13389 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
13390 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
13391 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13392 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13393 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13394 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
13395 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13396 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13397 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13398 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
13399 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13400 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13401 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13402 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
13403 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13404 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13405 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13406 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13407 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13408 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13409 // CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]])
13410 // CHECK: ret void
test_vst1q_f64_x3(float64_t * a,float64x2x3_t b)13411 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
13412 vst1q_f64_x3(a, b);
13413 }
13414
13415 // CHECK-LABEL: @test_vst1q_p64_x3(
13416 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
13417 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
13418 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
13419 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
13420 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
13421 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
13422 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13423 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13424 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13425 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
13426 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13427 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13428 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13429 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13430 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13431 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13432 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13433 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13434 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13435 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13436 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13437 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13438 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13439 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13440 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
13441 // CHECK: ret void
test_vst1q_p64_x3(poly64_t * a,poly64x2x3_t b)13442 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
13443 vst1q_p64_x3(a, b);
13444 }
13445
13446 // CHECK-LABEL: @test_vst1_f64_x3(
13447 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
13448 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
13449 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
13450 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
13451 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
13452 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
13453 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13454 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13455 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13456 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
13457 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13458 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13459 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13460 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
13461 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13462 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13463 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13464 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
13465 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13466 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13467 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13468 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13469 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13470 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13471 // CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]])
13472 // CHECK: ret void
test_vst1_f64_x3(float64_t * a,float64x1x3_t b)13473 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
13474 vst1_f64_x3(a, b);
13475 }
13476
13477 // CHECK-LABEL: @test_vst1_p64_x3(
13478 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
13479 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
13480 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
13481 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
13482 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
13483 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
13484 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13485 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13486 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13487 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
13488 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13489 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13490 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13491 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13492 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13493 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13494 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13495 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13496 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13497 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13498 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13499 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13500 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13501 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13502 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
13503 // CHECK: ret void
test_vst1_p64_x3(poly64_t * a,poly64x1x3_t b)13504 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
13505 vst1_p64_x3(a, b);
13506 }
13507
13508 // CHECK-LABEL: @test_vst1q_f64_x4(
13509 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
13510 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
13511 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
13512 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
13513 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
13514 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
13515 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13516 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13517 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13518 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
13519 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13520 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13521 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13522 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
13523 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13524 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13525 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13526 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
13527 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13528 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13529 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13530 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
13531 // CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
13532 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
13533 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13534 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13535 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13536 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
13537 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13538 // CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]])
13539 // CHECK: ret void
test_vst1q_f64_x4(float64_t * a,float64x2x4_t b)13540 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
13541 vst1q_f64_x4(a, b);
13542 }
13543
13544 // CHECK-LABEL: @test_vst1q_p64_x4(
13545 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
13546 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
13547 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
13548 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13549 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
13550 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
13551 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13552 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13553 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13554 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13555 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13556 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13557 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13558 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13559 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13560 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13561 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13562 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13563 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13564 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13565 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13566 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13567 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13568 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13569 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13570 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13571 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13572 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13573 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13574 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
13575 // CHECK: ret void
test_vst1q_p64_x4(poly64_t * a,poly64x2x4_t b)13576 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
13577 vst1q_p64_x4(a, b);
13578 }
13579
13580 // CHECK-LABEL: @test_vst1_f64_x4(
13581 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
13582 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
13583 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
13584 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
13585 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
13586 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
13587 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13588 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13589 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13590 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
13591 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13592 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13593 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13594 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
13595 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13596 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13597 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13598 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
13599 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13600 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13601 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13602 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
13603 // CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
13604 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
13605 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13606 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13607 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13608 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
13609 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13610 // CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]])
13611 // CHECK: ret void
test_vst1_f64_x4(float64_t * a,float64x1x4_t b)13612 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
13613 vst1_f64_x4(a, b);
13614 }
13615
13616 // CHECK-LABEL: @test_vst1_p64_x4(
13617 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
13618 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
13619 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
13620 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13621 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
13622 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
13623 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13624 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13625 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13626 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13627 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13628 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13629 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13630 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13631 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13632 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13633 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13634 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13635 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13636 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13637 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13638 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13639 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13640 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13641 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13642 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13643 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13644 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13645 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13646 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
13647 // CHECK: ret void
test_vst1_p64_x4(poly64_t * a,poly64x1x4_t b)13648 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
13649 vst1_p64_x4(a, b);
13650 }
13651
13652 // CHECK-LABEL: @test_vceqd_s64(
13653 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
13654 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13655 // CHECK: ret i64 [[VCEQD_I]]
test_vceqd_s64(int64_t a,int64_t b)13656 int64_t test_vceqd_s64(int64_t a, int64_t b) {
13657 return (int64_t)vceqd_s64(a, b);
13658 }
13659
13660 // CHECK-LABEL: @test_vceqd_u64(
13661 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
13662 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13663 // CHECK: ret i64 [[VCEQD_I]]
test_vceqd_u64(uint64_t a,uint64_t b)13664 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
13665 return (int64_t)vceqd_u64(a, b);
13666 }
13667
13668 // CHECK-LABEL: @test_vceqzd_s64(
13669 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
13670 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
13671 // CHECK: ret i64 [[VCEQZ_I]]
test_vceqzd_s64(int64_t a)13672 int64_t test_vceqzd_s64(int64_t a) {
13673 return (int64_t)vceqzd_s64(a);
13674 }
13675
13676 // CHECK-LABEL: @test_vceqzd_u64(
13677 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
13678 // CHECK: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
13679 // CHECK: ret i64 [[VCEQZD_I]]
test_vceqzd_u64(int64_t a)13680 int64_t test_vceqzd_u64(int64_t a) {
13681 return (int64_t)vceqzd_u64(a);
13682 }
13683
13684 // CHECK-LABEL: @test_vcged_s64(
13685 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, %b
13686 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13687 // CHECK: ret i64 [[VCEQD_I]]
test_vcged_s64(int64_t a,int64_t b)13688 int64_t test_vcged_s64(int64_t a, int64_t b) {
13689 return (int64_t)vcged_s64(a, b);
13690 }
13691
13692 // CHECK-LABEL: @test_vcged_u64(
13693 // CHECK: [[TMP0:%.*]] = icmp uge i64 %a, %b
13694 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13695 // CHECK: ret i64 [[VCEQD_I]]
test_vcged_u64(uint64_t a,uint64_t b)13696 uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
13697 return (uint64_t)vcged_u64(a, b);
13698 }
13699
13700 // CHECK-LABEL: @test_vcgezd_s64(
13701 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, 0
13702 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13703 // CHECK: ret i64 [[VCGEZ_I]]
test_vcgezd_s64(int64_t a)13704 int64_t test_vcgezd_s64(int64_t a) {
13705 return (int64_t)vcgezd_s64(a);
13706 }
13707
13708 // CHECK-LABEL: @test_vcgtd_s64(
13709 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, %b
13710 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13711 // CHECK: ret i64 [[VCEQD_I]]
test_vcgtd_s64(int64_t a,int64_t b)13712 int64_t test_vcgtd_s64(int64_t a, int64_t b) {
13713 return (int64_t)vcgtd_s64(a, b);
13714 }
13715
13716 // CHECK-LABEL: @test_vcgtd_u64(
13717 // CHECK: [[TMP0:%.*]] = icmp ugt i64 %a, %b
13718 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13719 // CHECK: ret i64 [[VCEQD_I]]
test_vcgtd_u64(uint64_t a,uint64_t b)13720 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
13721 return (uint64_t)vcgtd_u64(a, b);
13722 }
13723
13724 // CHECK-LABEL: @test_vcgtzd_s64(
13725 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, 0
13726 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13727 // CHECK: ret i64 [[VCGTZ_I]]
test_vcgtzd_s64(int64_t a)13728 int64_t test_vcgtzd_s64(int64_t a) {
13729 return (int64_t)vcgtzd_s64(a);
13730 }
13731
13732 // CHECK-LABEL: @test_vcled_s64(
13733 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, %b
13734 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13735 // CHECK: ret i64 [[VCEQD_I]]
test_vcled_s64(int64_t a,int64_t b)13736 int64_t test_vcled_s64(int64_t a, int64_t b) {
13737 return (int64_t)vcled_s64(a, b);
13738 }
13739
13740 // CHECK-LABEL: @test_vcled_u64(
13741 // CHECK: [[TMP0:%.*]] = icmp ule i64 %a, %b
13742 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13743 // CHECK: ret i64 [[VCEQD_I]]
test_vcled_u64(uint64_t a,uint64_t b)13744 uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
13745 return (uint64_t)vcled_u64(a, b);
13746 }
13747
13748 // CHECK-LABEL: @test_vclezd_s64(
13749 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, 0
13750 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13751 // CHECK: ret i64 [[VCLEZ_I]]
test_vclezd_s64(int64_t a)13752 int64_t test_vclezd_s64(int64_t a) {
13753 return (int64_t)vclezd_s64(a);
13754 }
13755
13756 // CHECK-LABEL: @test_vcltd_s64(
13757 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, %b
13758 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13759 // CHECK: ret i64 [[VCEQD_I]]
test_vcltd_s64(int64_t a,int64_t b)13760 int64_t test_vcltd_s64(int64_t a, int64_t b) {
13761 return (int64_t)vcltd_s64(a, b);
13762 }
13763
13764 // CHECK-LABEL: @test_vcltd_u64(
13765 // CHECK: [[TMP0:%.*]] = icmp ult i64 %a, %b
13766 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13767 // CHECK: ret i64 [[VCEQD_I]]
test_vcltd_u64(uint64_t a,uint64_t b)13768 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
13769 return (uint64_t)vcltd_u64(a, b);
13770 }
13771
13772 // CHECK-LABEL: @test_vcltzd_s64(
13773 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, 0
13774 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13775 // CHECK: ret i64 [[VCLTZ_I]]
test_vcltzd_s64(int64_t a)13776 int64_t test_vcltzd_s64(int64_t a) {
13777 return (int64_t)vcltzd_s64(a);
13778 }
13779
13780 // CHECK-LABEL: @test_vtstd_s64(
13781 // CHECK: [[TMP0:%.*]] = and i64 %a, %b
13782 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13783 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13784 // CHECK: ret i64 [[VTSTD_I]]
test_vtstd_s64(int64_t a,int64_t b)13785 int64_t test_vtstd_s64(int64_t a, int64_t b) {
13786 return (int64_t)vtstd_s64(a, b);
13787 }
13788
13789 // CHECK-LABEL: @test_vtstd_u64(
13790 // CHECK: [[TMP0:%.*]] = and i64 %a, %b
13791 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13792 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13793 // CHECK: ret i64 [[VTSTD_I]]
test_vtstd_u64(uint64_t a,uint64_t b)13794 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
13795 return (uint64_t)vtstd_u64(a, b);
13796 }
13797
13798 // CHECK-LABEL: @test_vabsd_s64(
13799 // CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a)
13800 // CHECK: ret i64 [[VABSD_S64_I]]
test_vabsd_s64(int64_t a)13801 int64_t test_vabsd_s64(int64_t a) {
13802 return (int64_t)vabsd_s64(a);
13803 }
13804
13805 // CHECK-LABEL: @test_vqabsb_s8(
13806 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13807 // CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
13808 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
13809 // CHECK: ret i8 [[TMP1]]
test_vqabsb_s8(int8_t a)13810 int8_t test_vqabsb_s8(int8_t a) {
13811 return (int8_t)vqabsb_s8(a);
13812 }
13813
13814 // CHECK-LABEL: @test_vqabsh_s16(
13815 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13816 // CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
13817 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
13818 // CHECK: ret i16 [[TMP1]]
test_vqabsh_s16(int16_t a)13819 int16_t test_vqabsh_s16(int16_t a) {
13820 return (int16_t)vqabsh_s16(a);
13821 }
13822
13823 // CHECK-LABEL: @test_vqabss_s32(
13824 // CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
13825 // CHECK: ret i32 [[VQABSS_S32_I]]
test_vqabss_s32(int32_t a)13826 int32_t test_vqabss_s32(int32_t a) {
13827 return (int32_t)vqabss_s32(a);
13828 }
13829
13830 // CHECK-LABEL: @test_vqabsd_s64(
13831 // CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
13832 // CHECK: ret i64 [[VQABSD_S64_I]]
test_vqabsd_s64(int64_t a)13833 int64_t test_vqabsd_s64(int64_t a) {
13834 return (int64_t)vqabsd_s64(a);
13835 }
13836
13837 // CHECK-LABEL: @test_vnegd_s64(
13838 // CHECK: [[VNEGD_I:%.*]] = sub i64 0, %a
13839 // CHECK: ret i64 [[VNEGD_I]]
test_vnegd_s64(int64_t a)13840 int64_t test_vnegd_s64(int64_t a) {
13841 return (int64_t)vnegd_s64(a);
13842 }
13843
13844 // CHECK-LABEL: @test_vqnegb_s8(
13845 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13846 // CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
13847 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
13848 // CHECK: ret i8 [[TMP1]]
test_vqnegb_s8(int8_t a)13849 int8_t test_vqnegb_s8(int8_t a) {
13850 return (int8_t)vqnegb_s8(a);
13851 }
13852
13853 // CHECK-LABEL: @test_vqnegh_s16(
13854 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13855 // CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
13856 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
13857 // CHECK: ret i16 [[TMP1]]
test_vqnegh_s16(int16_t a)13858 int16_t test_vqnegh_s16(int16_t a) {
13859 return (int16_t)vqnegh_s16(a);
13860 }
13861
13862 // CHECK-LABEL: @test_vqnegs_s32(
13863 // CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
13864 // CHECK: ret i32 [[VQNEGS_S32_I]]
test_vqnegs_s32(int32_t a)13865 int32_t test_vqnegs_s32(int32_t a) {
13866 return (int32_t)vqnegs_s32(a);
13867 }
13868
13869 // CHECK-LABEL: @test_vqnegd_s64(
13870 // CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
13871 // CHECK: ret i64 [[VQNEGD_S64_I]]
test_vqnegd_s64(int64_t a)13872 int64_t test_vqnegd_s64(int64_t a) {
13873 return (int64_t)vqnegd_s64(a);
13874 }
13875
13876 // CHECK-LABEL: @test_vuqaddb_s8(
13877 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13878 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
13879 // CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13880 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
13881 // CHECK: ret i8 [[TMP2]]
test_vuqaddb_s8(int8_t a,uint8_t b)13882 int8_t test_vuqaddb_s8(int8_t a, uint8_t b) {
13883 return (int8_t)vuqaddb_s8(a, b);
13884 }
13885
13886 // CHECK-LABEL: @test_vuqaddh_s16(
13887 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13888 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13889 // CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13890 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
13891 // CHECK: ret i16 [[TMP2]]
test_vuqaddh_s16(int16_t a,uint16_t b)13892 int16_t test_vuqaddh_s16(int16_t a, uint16_t b) {
13893 return (int16_t)vuqaddh_s16(a, b);
13894 }
13895
13896 // CHECK-LABEL: @test_vuqadds_s32(
13897 // CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
13898 // CHECK: ret i32 [[VUQADDS_S32_I]]
test_vuqadds_s32(int32_t a,uint32_t b)13899 int32_t test_vuqadds_s32(int32_t a, uint32_t b) {
13900 return (int32_t)vuqadds_s32(a, b);
13901 }
13902
13903 // CHECK-LABEL: @test_vuqaddd_s64(
13904 // CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
13905 // CHECK: ret i64 [[VUQADDD_S64_I]]
test_vuqaddd_s64(int64_t a,uint64_t b)13906 int64_t test_vuqaddd_s64(int64_t a, uint64_t b) {
13907 return (int64_t)vuqaddd_s64(a, b);
13908 }
13909
13910 // CHECK-LABEL: @test_vsqaddb_u8(
13911 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13912 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
13913 // CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13914 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
13915 // CHECK: ret i8 [[TMP2]]
test_vsqaddb_u8(uint8_t a,int8_t b)13916 uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
13917 return (uint8_t)vsqaddb_u8(a, b);
13918 }
13919
13920 // CHECK-LABEL: @test_vsqaddh_u16(
13921 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13922 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13923 // CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13924 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
13925 // CHECK: ret i16 [[TMP2]]
test_vsqaddh_u16(uint16_t a,int16_t b)13926 uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
13927 return (uint16_t)vsqaddh_u16(a, b);
13928 }
13929
13930 // CHECK-LABEL: @test_vsqadds_u32(
13931 // CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
13932 // CHECK: ret i32 [[VSQADDS_U32_I]]
test_vsqadds_u32(uint32_t a,int32_t b)13933 uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
13934 return (uint32_t)vsqadds_u32(a, b);
13935 }
13936
13937 // CHECK-LABEL: @test_vsqaddd_u64(
13938 // CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
13939 // CHECK: ret i64 [[VSQADDD_U64_I]]
test_vsqaddd_u64(uint64_t a,int64_t b)13940 uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
13941 return (uint64_t)vsqaddd_u64(a, b);
13942 }
13943
13944 // CHECK-LABEL: @test_vqdmlalh_s16(
13945 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13946 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
13947 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13948 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13949 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
13950 // CHECK: ret i32 [[VQDMLXL1_I]]
test_vqdmlalh_s16(int32_t a,int16_t b,int16_t c)13951 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
13952 return (int32_t)vqdmlalh_s16(a, b, c);
13953 }
13954
13955 // CHECK-LABEL: @test_vqdmlals_s32(
13956 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13957 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
13958 // CHECK: ret i64 [[VQDMLXL1_I]]
test_vqdmlals_s32(int64_t a,int32_t b,int32_t c)13959 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
13960 return (int64_t)vqdmlals_s32(a, b, c);
13961 }
13962
13963 // CHECK-LABEL: @test_vqdmlslh_s16(
13964 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13965 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
13966 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13967 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13968 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
13969 // CHECK: ret i32 [[VQDMLXL1_I]]
test_vqdmlslh_s16(int32_t a,int16_t b,int16_t c)13970 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
13971 return (int32_t)vqdmlslh_s16(a, b, c);
13972 }
13973
13974 // CHECK-LABEL: @test_vqdmlsls_s32(
13975 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13976 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
13977 // CHECK: ret i64 [[VQDMLXL1_I]]
test_vqdmlsls_s32(int64_t a,int32_t b,int32_t c)13978 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
13979 return (int64_t)vqdmlsls_s32(a, b, c);
13980 }
13981
13982 // CHECK-LABEL: @test_vqdmullh_s16(
13983 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13984 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13985 // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13986 // CHECK: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
13987 // CHECK: ret i32 [[TMP2]]
test_vqdmullh_s16(int16_t a,int16_t b)13988 int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
13989 return (int32_t)vqdmullh_s16(a, b);
13990 }
13991
13992 // CHECK-LABEL: @test_vqdmulls_s32(
13993 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
13994 // CHECK: ret i64 [[VQDMULLS_S32_I]]
test_vqdmulls_s32(int32_t a,int32_t b)13995 int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
13996 return (int64_t)vqdmulls_s32(a, b);
13997 }
13998
13999 // CHECK-LABEL: @test_vqmovunh_s16(
14000 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14001 // CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
14002 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
14003 // CHECK: ret i8 [[TMP1]]
test_vqmovunh_s16(int16_t a)14004 int8_t test_vqmovunh_s16(int16_t a) {
14005 return (int8_t)vqmovunh_s16(a);
14006 }
14007
14008 // CHECK-LABEL: @test_vqmovuns_s32(
14009 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14010 // CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
14011 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
14012 // CHECK: ret i16 [[TMP1]]
test_vqmovuns_s32(int32_t a)14013 int16_t test_vqmovuns_s32(int32_t a) {
14014 return (int16_t)vqmovuns_s32(a);
14015 }
14016
14017 // CHECK-LABEL: @test_vqmovund_s64(
14018 // CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
14019 // CHECK: ret i32 [[VQMOVUND_S64_I]]
test_vqmovund_s64(int64_t a)14020 int32_t test_vqmovund_s64(int64_t a) {
14021 return (int32_t)vqmovund_s64(a);
14022 }
14023
14024 // CHECK-LABEL: @test_vqmovnh_s16(
14025 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14026 // CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
14027 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
14028 // CHECK: ret i8 [[TMP1]]
test_vqmovnh_s16(int16_t a)14029 int8_t test_vqmovnh_s16(int16_t a) {
14030 return (int8_t)vqmovnh_s16(a);
14031 }
14032
14033 // CHECK-LABEL: @test_vqmovns_s32(
14034 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14035 // CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
14036 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
14037 // CHECK: ret i16 [[TMP1]]
test_vqmovns_s32(int32_t a)14038 int16_t test_vqmovns_s32(int32_t a) {
14039 return (int16_t)vqmovns_s32(a);
14040 }
14041
14042 // CHECK-LABEL: @test_vqmovnd_s64(
14043 // CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
14044 // CHECK: ret i32 [[VQMOVND_S64_I]]
test_vqmovnd_s64(int64_t a)14045 int32_t test_vqmovnd_s64(int64_t a) {
14046 return (int32_t)vqmovnd_s64(a);
14047 }
14048
14049 // CHECK-LABEL: @test_vqmovnh_u16(
14050 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14051 // CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
14052 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
14053 // CHECK: ret i8 [[TMP1]]
test_vqmovnh_u16(int16_t a)14054 int8_t test_vqmovnh_u16(int16_t a) {
14055 return (int8_t)vqmovnh_u16(a);
14056 }
14057
14058 // CHECK-LABEL: @test_vqmovns_u32(
14059 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14060 // CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
14061 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
14062 // CHECK: ret i16 [[TMP1]]
test_vqmovns_u32(int32_t a)14063 int16_t test_vqmovns_u32(int32_t a) {
14064 return (int16_t)vqmovns_u32(a);
14065 }
14066
14067 // CHECK-LABEL: @test_vqmovnd_u64(
14068 // CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
14069 // CHECK: ret i32 [[VQMOVND_U64_I]]
test_vqmovnd_u64(int64_t a)14070 int32_t test_vqmovnd_u64(int64_t a) {
14071 return (int32_t)vqmovnd_u64(a);
14072 }
14073
14074 // CHECK-LABEL: @test_vceqs_f32(
14075 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, %b
14076 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14077 // CHECK: ret i32 [[VCMPD_I]]
test_vceqs_f32(float32_t a,float32_t b)14078 uint32_t test_vceqs_f32(float32_t a, float32_t b) {
14079 return (uint32_t)vceqs_f32(a, b);
14080 }
14081
14082 // CHECK-LABEL: @test_vceqd_f64(
14083 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, %b
14084 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14085 // CHECK: ret i64 [[VCMPD_I]]
test_vceqd_f64(float64_t a,float64_t b)14086 uint64_t test_vceqd_f64(float64_t a, float64_t b) {
14087 return (uint64_t)vceqd_f64(a, b);
14088 }
14089
14090 // CHECK-LABEL: @test_vceqzs_f32(
14091 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
14092 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
14093 // CHECK: ret i32 [[VCEQZ_I]]
test_vceqzs_f32(float32_t a)14094 uint32_t test_vceqzs_f32(float32_t a) {
14095 return (uint32_t)vceqzs_f32(a);
14096 }
14097
14098 // CHECK-LABEL: @test_vceqzd_f64(
14099 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
14100 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
14101 // CHECK: ret i64 [[VCEQZ_I]]
test_vceqzd_f64(float64_t a)14102 uint64_t test_vceqzd_f64(float64_t a) {
14103 return (uint64_t)vceqzd_f64(a);
14104 }
14105
14106 // CHECK-LABEL: @test_vcges_f32(
14107 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, %b
14108 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14109 // CHECK: ret i32 [[VCMPD_I]]
test_vcges_f32(float32_t a,float32_t b)14110 uint32_t test_vcges_f32(float32_t a, float32_t b) {
14111 return (uint32_t)vcges_f32(a, b);
14112 }
14113
14114 // CHECK-LABEL: @test_vcged_f64(
14115 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, %b
14116 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14117 // CHECK: ret i64 [[VCMPD_I]]
test_vcged_f64(float64_t a,float64_t b)14118 uint64_t test_vcged_f64(float64_t a, float64_t b) {
14119 return (uint64_t)vcged_f64(a, b);
14120 }
14121
14122 // CHECK-LABEL: @test_vcgezs_f32(
14123 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
14124 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14125 // CHECK: ret i32 [[VCGEZ_I]]
test_vcgezs_f32(float32_t a)14126 uint32_t test_vcgezs_f32(float32_t a) {
14127 return (uint32_t)vcgezs_f32(a);
14128 }
14129
14130 // CHECK-LABEL: @test_vcgezd_f64(
14131 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
14132 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14133 // CHECK: ret i64 [[VCGEZ_I]]
test_vcgezd_f64(float64_t a)14134 uint64_t test_vcgezd_f64(float64_t a) {
14135 return (uint64_t)vcgezd_f64(a);
14136 }
14137
14138 // CHECK-LABEL: @test_vcgts_f32(
14139 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, %b
14140 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14141 // CHECK: ret i32 [[VCMPD_I]]
test_vcgts_f32(float32_t a,float32_t b)14142 uint32_t test_vcgts_f32(float32_t a, float32_t b) {
14143 return (uint32_t)vcgts_f32(a, b);
14144 }
14145
14146 // CHECK-LABEL: @test_vcgtd_f64(
14147 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, %b
14148 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14149 // CHECK: ret i64 [[VCMPD_I]]
test_vcgtd_f64(float64_t a,float64_t b)14150 uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
14151 return (uint64_t)vcgtd_f64(a, b);
14152 }
14153
14154 // CHECK-LABEL: @test_vcgtzs_f32(
14155 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
14156 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14157 // CHECK: ret i32 [[VCGTZ_I]]
test_vcgtzs_f32(float32_t a)14158 uint32_t test_vcgtzs_f32(float32_t a) {
14159 return (uint32_t)vcgtzs_f32(a);
14160 }
14161
14162 // CHECK-LABEL: @test_vcgtzd_f64(
14163 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
14164 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14165 // CHECK: ret i64 [[VCGTZ_I]]
test_vcgtzd_f64(float64_t a)14166 uint64_t test_vcgtzd_f64(float64_t a) {
14167 return (uint64_t)vcgtzd_f64(a);
14168 }
14169
14170 // CHECK-LABEL: @test_vcles_f32(
14171 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, %b
14172 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14173 // CHECK: ret i32 [[VCMPD_I]]
test_vcles_f32(float32_t a,float32_t b)14174 uint32_t test_vcles_f32(float32_t a, float32_t b) {
14175 return (uint32_t)vcles_f32(a, b);
14176 }
14177
14178 // CHECK-LABEL: @test_vcled_f64(
14179 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, %b
14180 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14181 // CHECK: ret i64 [[VCMPD_I]]
test_vcled_f64(float64_t a,float64_t b)14182 uint64_t test_vcled_f64(float64_t a, float64_t b) {
14183 return (uint64_t)vcled_f64(a, b);
14184 }
14185
14186 // CHECK-LABEL: @test_vclezs_f32(
14187 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
14188 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14189 // CHECK: ret i32 [[VCLEZ_I]]
test_vclezs_f32(float32_t a)14190 uint32_t test_vclezs_f32(float32_t a) {
14191 return (uint32_t)vclezs_f32(a);
14192 }
14193
14194 // CHECK-LABEL: @test_vclezd_f64(
14195 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
14196 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14197 // CHECK: ret i64 [[VCLEZ_I]]
test_vclezd_f64(float64_t a)14198 uint64_t test_vclezd_f64(float64_t a) {
14199 return (uint64_t)vclezd_f64(a);
14200 }
14201
14202 // CHECK-LABEL: @test_vclts_f32(
14203 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, %b
14204 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14205 // CHECK: ret i32 [[VCMPD_I]]
test_vclts_f32(float32_t a,float32_t b)14206 uint32_t test_vclts_f32(float32_t a, float32_t b) {
14207 return (uint32_t)vclts_f32(a, b);
14208 }
14209
14210 // CHECK-LABEL: @test_vcltd_f64(
14211 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, %b
14212 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14213 // CHECK: ret i64 [[VCMPD_I]]
test_vcltd_f64(float64_t a,float64_t b)14214 uint64_t test_vcltd_f64(float64_t a, float64_t b) {
14215 return (uint64_t)vcltd_f64(a, b);
14216 }
14217
14218 // CHECK-LABEL: @test_vcltzs_f32(
14219 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
14220 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14221 // CHECK: ret i32 [[VCLTZ_I]]
test_vcltzs_f32(float32_t a)14222 uint32_t test_vcltzs_f32(float32_t a) {
14223 return (uint32_t)vcltzs_f32(a);
14224 }
14225
14226 // CHECK-LABEL: @test_vcltzd_f64(
14227 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
14228 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14229 // CHECK: ret i64 [[VCLTZ_I]]
test_vcltzd_f64(float64_t a)14230 uint64_t test_vcltzd_f64(float64_t a) {
14231 return (uint64_t)vcltzd_f64(a);
14232 }
14233
14234 // CHECK-LABEL: @test_vcages_f32(
14235 // CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b)
14236 // CHECK: ret i32 [[VCAGES_F32_I]]
test_vcages_f32(float32_t a,float32_t b)14237 uint32_t test_vcages_f32(float32_t a, float32_t b) {
14238 return (uint32_t)vcages_f32(a, b);
14239 }
14240
14241 // CHECK-LABEL: @test_vcaged_f64(
14242 // CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b)
14243 // CHECK: ret i64 [[VCAGED_F64_I]]
test_vcaged_f64(float64_t a,float64_t b)14244 uint64_t test_vcaged_f64(float64_t a, float64_t b) {
14245 return (uint64_t)vcaged_f64(a, b);
14246 }
14247
14248 // CHECK-LABEL: @test_vcagts_f32(
14249 // CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b)
14250 // CHECK: ret i32 [[VCAGTS_F32_I]]
test_vcagts_f32(float32_t a,float32_t b)14251 uint32_t test_vcagts_f32(float32_t a, float32_t b) {
14252 return (uint32_t)vcagts_f32(a, b);
14253 }
14254
14255 // CHECK-LABEL: @test_vcagtd_f64(
14256 // CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b)
14257 // CHECK: ret i64 [[VCAGTD_F64_I]]
test_vcagtd_f64(float64_t a,float64_t b)14258 uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
14259 return (uint64_t)vcagtd_f64(a, b);
14260 }
14261
14262 // CHECK-LABEL: @test_vcales_f32(
14263 // CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a)
14264 // CHECK: ret i32 [[VCALES_F32_I]]
test_vcales_f32(float32_t a,float32_t b)14265 uint32_t test_vcales_f32(float32_t a, float32_t b) {
14266 return (uint32_t)vcales_f32(a, b);
14267 }
14268
14269 // CHECK-LABEL: @test_vcaled_f64(
14270 // CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a)
14271 // CHECK: ret i64 [[VCALED_F64_I]]
test_vcaled_f64(float64_t a,float64_t b)14272 uint64_t test_vcaled_f64(float64_t a, float64_t b) {
14273 return (uint64_t)vcaled_f64(a, b);
14274 }
14275
14276 // CHECK-LABEL: @test_vcalts_f32(
14277 // CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
14278 // CHECK: ret i32 [[VCALTS_F32_I]]
test_vcalts_f32(float32_t a,float32_t b)14279 uint32_t test_vcalts_f32(float32_t a, float32_t b) {
14280 return (uint32_t)vcalts_f32(a, b);
14281 }
14282
14283 // CHECK-LABEL: @test_vcaltd_f64(
14284 // CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a)
14285 // CHECK: ret i64 [[VCALTD_F64_I]]
test_vcaltd_f64(float64_t a,float64_t b)14286 uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
14287 return (uint64_t)vcaltd_f64(a, b);
14288 }
14289
14290 // CHECK-LABEL: @test_vshrd_n_s64(
14291 // CHECK: [[SHRD_N:%.*]] = ashr i64 %a, 1
14292 // CHECK: ret i64 [[SHRD_N]]
test_vshrd_n_s64(int64_t a)14293 int64_t test_vshrd_n_s64(int64_t a) {
14294 return (int64_t)vshrd_n_s64(a, 1);
14295 }
14296
14297 // CHECK-LABEL: @test_vshr_n_s64(
14298 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14299 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14300 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14301 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)14302 int64x1_t test_vshr_n_s64(int64x1_t a) {
14303 return vshr_n_s64(a, 1);
14304 }
14305
14306 // CHECK-LABEL: @test_vshrd_n_u64(
14307 // CHECK: ret i64 0
test_vshrd_n_u64(uint64_t a)14308 uint64_t test_vshrd_n_u64(uint64_t a) {
14309 return (uint64_t)vshrd_n_u64(a, 64);
14310 }
14311
14312 // CHECK-LABEL: @test_vshrd_n_u64_2(
14313 // CHECK: ret i64 0
test_vshrd_n_u64_2()14314 uint64_t test_vshrd_n_u64_2() {
14315 uint64_t a = UINT64_C(0xf000000000000000);
14316 return vshrd_n_u64(a, 64);
14317 }
14318
14319 // CHECK-LABEL: @test_vshr_n_u64(
14320 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14321 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14322 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14323 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)14324 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14325 return vshr_n_u64(a, 1);
14326 }
14327
14328 // CHECK-LABEL: @test_vrshrd_n_s64(
14329 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
14330 // CHECK: ret i64 [[VRSHR_N]]
test_vrshrd_n_s64(int64_t a)14331 int64_t test_vrshrd_n_s64(int64_t a) {
14332 return (int64_t)vrshrd_n_s64(a, 63);
14333 }
14334
14335 // CHECK-LABEL: @test_vrshr_n_s64(
14336 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14337 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14338 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14339 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)14340 int64x1_t test_vrshr_n_s64(int64x1_t a) {
14341 return vrshr_n_s64(a, 1);
14342 }
14343
14344 // CHECK-LABEL: @test_vrshrd_n_u64(
14345 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
14346 // CHECK: ret i64 [[VRSHR_N]]
test_vrshrd_n_u64(uint64_t a)14347 uint64_t test_vrshrd_n_u64(uint64_t a) {
14348 return (uint64_t)vrshrd_n_u64(a, 63);
14349 }
14350
14351 // CHECK-LABEL: @test_vrshr_n_u64(
14352 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14353 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14354 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14355 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)14356 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
14357 return vrshr_n_u64(a, 1);
14358 }
14359
14360 // CHECK-LABEL: @test_vsrad_n_s64(
14361 // CHECK: [[SHRD_N:%.*]] = ashr i64 %b, 63
14362 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14363 // CHECK: ret i64 [[TMP0]]
test_vsrad_n_s64(int64_t a,int64_t b)14364 int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
14365 return (int64_t)vsrad_n_s64(a, b, 63);
14366 }
14367
14368 // CHECK-LABEL: @test_vsra_n_s64(
14369 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14370 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14371 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14372 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14373 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
14374 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14375 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)14376 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
14377 return vsra_n_s64(a, b, 1);
14378 }
14379
14380 // CHECK-LABEL: @test_vsrad_n_u64(
14381 // CHECK: [[SHRD_N:%.*]] = lshr i64 %b, 63
14382 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14383 // CHECK: ret i64 [[TMP0]]
test_vsrad_n_u64(uint64_t a,uint64_t b)14384 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
14385 return (uint64_t)vsrad_n_u64(a, b, 63);
14386 }
14387
14388 // CHECK-LABEL: @test_vsrad_n_u64_2(
14389 // CHECK: ret i64 %a
test_vsrad_n_u64_2(uint64_t a,uint64_t b)14390 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
14391 return (uint64_t)vsrad_n_u64(a, b, 64);
14392 }
14393
14394 // CHECK-LABEL: @test_vsra_n_u64(
14395 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14396 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14397 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14398 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14399 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
14400 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14401 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)14402 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
14403 return vsra_n_u64(a, b, 1);
14404 }
14405
14406 // CHECK-LABEL: @test_vrsrad_n_s64(
14407 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
14408 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14409 // CHECK: ret i64 [[TMP1]]
test_vrsrad_n_s64(int64_t a,int64_t b)14410 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
14411 return (int64_t)vrsrad_n_s64(a, b, 63);
14412 }
14413
14414 // CHECK-LABEL: @test_vrsra_n_s64(
14415 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14416 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14417 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14418 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14419 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14420 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14421 // CHECK: ret <1 x i64> [[TMP3]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)14422 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
14423 return vrsra_n_s64(a, b, 1);
14424 }
14425
14426 // CHECK-LABEL: @test_vrsrad_n_u64(
14427 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
14428 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14429 // CHECK: ret i64 [[TMP1]]
test_vrsrad_n_u64(uint64_t a,uint64_t b)14430 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
14431 return (uint64_t)vrsrad_n_u64(a, b, 63);
14432 }
14433
14434 // CHECK-LABEL: @test_vrsra_n_u64(
14435 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14436 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14437 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14438 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14439 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14440 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14441 // CHECK: ret <1 x i64> [[TMP3]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)14442 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
14443 return vrsra_n_u64(a, b, 1);
14444 }
14445
14446 // CHECK-LABEL: @test_vshld_n_s64(
14447 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 1
14448 // CHECK: ret i64 [[SHLD_N]]
test_vshld_n_s64(int64_t a)14449 int64_t test_vshld_n_s64(int64_t a) {
14450 return (int64_t)vshld_n_s64(a, 1);
14451 }
14452
14453 // CHECK-LABEL: @test_vshl_n_s64(
14454 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14455 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14456 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14457 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)14458 int64x1_t test_vshl_n_s64(int64x1_t a) {
14459 return vshl_n_s64(a, 1);
14460 }
14461
14462 // CHECK-LABEL: @test_vshld_n_u64(
14463 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 63
14464 // CHECK: ret i64 [[SHLD_N]]
test_vshld_n_u64(uint64_t a)14465 uint64_t test_vshld_n_u64(uint64_t a) {
14466 return (uint64_t)vshld_n_u64(a, 63);
14467 }
14468
14469 // CHECK-LABEL: @test_vshl_n_u64(
14470 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14471 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14472 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14473 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)14474 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14475 return vshl_n_u64(a, 1);
14476 }
14477
14478 // CHECK-LABEL: @test_vqshlb_n_s8(
14479 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14480 // CHECK: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14481 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
14482 // CHECK: ret i8 [[TMP1]]
test_vqshlb_n_s8(int8_t a)14483 int8_t test_vqshlb_n_s8(int8_t a) {
14484 return (int8_t)vqshlb_n_s8(a, 7);
14485 }
14486
14487 // CHECK-LABEL: @test_vqshlh_n_s16(
14488 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14489 // CHECK: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14490 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
14491 // CHECK: ret i16 [[TMP1]]
test_vqshlh_n_s16(int16_t a)14492 int16_t test_vqshlh_n_s16(int16_t a) {
14493 return (int16_t)vqshlh_n_s16(a, 15);
14494 }
14495
14496 // CHECK-LABEL: @test_vqshls_n_s32(
14497 // CHECK: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
14498 // CHECK: ret i32 [[VQSHLS_N_S32]]
test_vqshls_n_s32(int32_t a)14499 int32_t test_vqshls_n_s32(int32_t a) {
14500 return (int32_t)vqshls_n_s32(a, 31);
14501 }
14502
14503 // CHECK-LABEL: @test_vqshld_n_s64(
14504 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
14505 // CHECK: ret i64 [[VQSHL_N]]
test_vqshld_n_s64(int64_t a)14506 int64_t test_vqshld_n_s64(int64_t a) {
14507 return (int64_t)vqshld_n_s64(a, 63);
14508 }
14509
14510 // CHECK-LABEL: @test_vqshl_n_s8(
14511 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14512 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)14513 int8x8_t test_vqshl_n_s8(int8x8_t a) {
14514 return vqshl_n_s8(a, 0);
14515 }
14516
14517 // CHECK-LABEL: @test_vqshlq_n_s8(
14518 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14519 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)14520 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
14521 return vqshlq_n_s8(a, 0);
14522 }
14523
14524 // CHECK-LABEL: @test_vqshl_n_s16(
14525 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14526 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14527 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14528 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)14529 int16x4_t test_vqshl_n_s16(int16x4_t a) {
14530 return vqshl_n_s16(a, 0);
14531 }
14532
14533 // CHECK-LABEL: @test_vqshlq_n_s16(
14534 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14535 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14536 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14537 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)14538 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
14539 return vqshlq_n_s16(a, 0);
14540 }
14541
14542 // CHECK-LABEL: @test_vqshl_n_s32(
14543 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14544 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14545 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14546 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)14547 int32x2_t test_vqshl_n_s32(int32x2_t a) {
14548 return vqshl_n_s32(a, 0);
14549 }
14550
14551 // CHECK-LABEL: @test_vqshlq_n_s32(
14552 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14553 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14554 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14555 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)14556 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
14557 return vqshlq_n_s32(a, 0);
14558 }
14559
14560 // CHECK-LABEL: @test_vqshlq_n_s64(
14561 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14562 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14563 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14564 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)14565 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
14566 return vqshlq_n_s64(a, 0);
14567 }
14568
14569 // CHECK-LABEL: @test_vqshl_n_u8(
14570 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14571 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)14572 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
14573 return vqshl_n_u8(a, 0);
14574 }
14575
14576 // CHECK-LABEL: @test_vqshlq_n_u8(
14577 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14578 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)14579 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
14580 return vqshlq_n_u8(a, 0);
14581 }
14582
14583 // CHECK-LABEL: @test_vqshl_n_u16(
14584 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14585 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14586 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14587 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)14588 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
14589 return vqshl_n_u16(a, 0);
14590 }
14591
14592 // CHECK-LABEL: @test_vqshlq_n_u16(
14593 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14594 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14595 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14596 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)14597 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
14598 return vqshlq_n_u16(a, 0);
14599 }
14600
14601 // CHECK-LABEL: @test_vqshl_n_u32(
14602 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14603 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14604 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14605 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)14606 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
14607 return vqshl_n_u32(a, 0);
14608 }
14609
14610 // CHECK-LABEL: @test_vqshlq_n_u32(
14611 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14612 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14613 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14614 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)14615 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
14616 return vqshlq_n_u32(a, 0);
14617 }
14618
14619 // CHECK-LABEL: @test_vqshlq_n_u64(
14620 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14621 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14622 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14623 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)14624 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
14625 return vqshlq_n_u64(a, 0);
14626 }
14627
14628 // CHECK-LABEL: @test_vqshl_n_s64(
14629 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14630 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14631 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14632 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)14633 int64x1_t test_vqshl_n_s64(int64x1_t a) {
14634 return vqshl_n_s64(a, 1);
14635 }
14636
14637 // CHECK-LABEL: @test_vqshlb_n_u8(
14638 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14639 // CHECK: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14640 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
14641 // CHECK: ret i8 [[TMP1]]
test_vqshlb_n_u8(uint8_t a)14642 uint8_t test_vqshlb_n_u8(uint8_t a) {
14643 return (uint8_t)vqshlb_n_u8(a, 7);
14644 }
14645
14646 // CHECK-LABEL: @test_vqshlh_n_u16(
14647 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14648 // CHECK: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14649 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
14650 // CHECK: ret i16 [[TMP1]]
test_vqshlh_n_u16(uint16_t a)14651 uint16_t test_vqshlh_n_u16(uint16_t a) {
14652 return (uint16_t)vqshlh_n_u16(a, 15);
14653 }
14654
14655 // CHECK-LABEL: @test_vqshls_n_u32(
14656 // CHECK: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
14657 // CHECK: ret i32 [[VQSHLS_N_U32]]
test_vqshls_n_u32(uint32_t a)14658 uint32_t test_vqshls_n_u32(uint32_t a) {
14659 return (uint32_t)vqshls_n_u32(a, 31);
14660 }
14661
14662 // CHECK-LABEL: @test_vqshld_n_u64(
14663 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
14664 // CHECK: ret i64 [[VQSHL_N]]
test_vqshld_n_u64(uint64_t a)14665 uint64_t test_vqshld_n_u64(uint64_t a) {
14666 return (uint64_t)vqshld_n_u64(a, 63);
14667 }
14668
14669 // CHECK-LABEL: @test_vqshl_n_u64(
14670 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14671 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14672 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14673 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)14674 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
14675 return vqshl_n_u64(a, 1);
14676 }
14677
14678 // CHECK-LABEL: @test_vqshlub_n_s8(
14679 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14680 // CHECK: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14681 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
14682 // CHECK: ret i8 [[TMP1]]
test_vqshlub_n_s8(int8_t a)14683 int8_t test_vqshlub_n_s8(int8_t a) {
14684 return (int8_t)vqshlub_n_s8(a, 7);
14685 }
14686
14687 // CHECK-LABEL: @test_vqshluh_n_s16(
14688 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14689 // CHECK: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14690 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
14691 // CHECK: ret i16 [[TMP1]]
test_vqshluh_n_s16(int16_t a)14692 int16_t test_vqshluh_n_s16(int16_t a) {
14693 return (int16_t)vqshluh_n_s16(a, 15);
14694 }
14695
14696 // CHECK-LABEL: @test_vqshlus_n_s32(
14697 // CHECK: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
14698 // CHECK: ret i32 [[VQSHLUS_N_S32]]
test_vqshlus_n_s32(int32_t a)14699 int32_t test_vqshlus_n_s32(int32_t a) {
14700 return (int32_t)vqshlus_n_s32(a, 31);
14701 }
14702
14703 // CHECK-LABEL: @test_vqshlud_n_s64(
14704 // CHECK: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
14705 // CHECK: ret i64 [[VQSHLU_N]]
test_vqshlud_n_s64(int64_t a)14706 int64_t test_vqshlud_n_s64(int64_t a) {
14707 return (int64_t)vqshlud_n_s64(a, 63);
14708 }
14709
14710 // CHECK-LABEL: @test_vqshlu_n_s64(
14711 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14712 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14713 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
14714 // CHECK: ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)14715 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
14716 return vqshlu_n_s64(a, 1);
14717 }
14718
14719 // CHECK-LABEL: @test_vsrid_n_s64(
14720 // CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14721 // CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14722 // CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
14723 // CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
14724 // CHECK: ret i64 [[VSRID_N_S643]]
test_vsrid_n_s64(int64_t a,int64_t b)14725 int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
14726 return (int64_t)vsrid_n_s64(a, b, 63);
14727 }
14728
14729 // CHECK-LABEL: @test_vsri_n_s64(
14730 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14731 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14732 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14733 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14734 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14735 // CHECK: ret <1 x i64> [[VSRI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)14736 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
14737 return vsri_n_s64(a, b, 1);
14738 }
14739
14740 // CHECK-LABEL: @test_vsrid_n_u64(
14741 // CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14742 // CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14743 // CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
14744 // CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
14745 // CHECK: ret i64 [[VSRID_N_U643]]
test_vsrid_n_u64(uint64_t a,uint64_t b)14746 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
14747 return (uint64_t)vsrid_n_u64(a, b, 63);
14748 }
14749
14750 // CHECK-LABEL: @test_vsri_n_u64(
14751 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14752 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14753 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14754 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14755 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14756 // CHECK: ret <1 x i64> [[VSRI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)14757 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
14758 return vsri_n_u64(a, b, 1);
14759 }
14760
14761 // CHECK-LABEL: @test_vslid_n_s64(
14762 // CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14763 // CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14764 // CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
14765 // CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
14766 // CHECK: ret i64 [[VSLID_N_S643]]
test_vslid_n_s64(int64_t a,int64_t b)14767 int64_t test_vslid_n_s64(int64_t a, int64_t b) {
14768 return (int64_t)vslid_n_s64(a, b, 63);
14769 }
14770
14771 // CHECK-LABEL: @test_vsli_n_s64(
14772 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14773 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14774 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14775 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14776 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14777 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)14778 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14779 return vsli_n_s64(a, b, 1);
14780 }
14781
14782 // CHECK-LABEL: @test_vslid_n_u64(
14783 // CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14784 // CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14785 // CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
14786 // CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
14787 // CHECK: ret i64 [[VSLID_N_U643]]
test_vslid_n_u64(uint64_t a,uint64_t b)14788 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
14789 return (uint64_t)vslid_n_u64(a, b, 63);
14790 }
14791
14792 // CHECK-LABEL: @test_vsli_n_u64(
14793 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14794 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14795 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14796 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14797 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14798 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)14799 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
14800 return vsli_n_u64(a, b, 1);
14801 }
14802
14803 // CHECK-LABEL: @test_vqshrnh_n_s16(
14804 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14805 // CHECK: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14806 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
14807 // CHECK: ret i8 [[TMP1]]
test_vqshrnh_n_s16(int16_t a)14808 int8_t test_vqshrnh_n_s16(int16_t a) {
14809 return (int8_t)vqshrnh_n_s16(a, 8);
14810 }
14811
14812 // CHECK-LABEL: @test_vqshrns_n_s32(
14813 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14814 // CHECK: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14815 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
14816 // CHECK: ret i16 [[TMP1]]
test_vqshrns_n_s32(int32_t a)14817 int16_t test_vqshrns_n_s32(int32_t a) {
14818 return (int16_t)vqshrns_n_s32(a, 16);
14819 }
14820
14821 // CHECK-LABEL: @test_vqshrnd_n_s64(
14822 // CHECK: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
14823 // CHECK: ret i32 [[VQSHRND_N_S64]]
test_vqshrnd_n_s64(int64_t a)14824 int32_t test_vqshrnd_n_s64(int64_t a) {
14825 return (int32_t)vqshrnd_n_s64(a, 32);
14826 }
14827
14828 // CHECK-LABEL: @test_vqshrnh_n_u16(
14829 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14830 // CHECK: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14831 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
14832 // CHECK: ret i8 [[TMP1]]
test_vqshrnh_n_u16(uint16_t a)14833 uint8_t test_vqshrnh_n_u16(uint16_t a) {
14834 return (uint8_t)vqshrnh_n_u16(a, 8);
14835 }
14836
14837 // CHECK-LABEL: @test_vqshrns_n_u32(
14838 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14839 // CHECK: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14840 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
14841 // CHECK: ret i16 [[TMP1]]
test_vqshrns_n_u32(uint32_t a)14842 uint16_t test_vqshrns_n_u32(uint32_t a) {
14843 return (uint16_t)vqshrns_n_u32(a, 16);
14844 }
14845
14846 // CHECK-LABEL: @test_vqshrnd_n_u64(
14847 // CHECK: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
14848 // CHECK: ret i32 [[VQSHRND_N_U64]]
test_vqshrnd_n_u64(uint64_t a)14849 uint32_t test_vqshrnd_n_u64(uint64_t a) {
14850 return (uint32_t)vqshrnd_n_u64(a, 32);
14851 }
14852
14853 // CHECK-LABEL: @test_vqrshrnh_n_s16(
14854 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14855 // CHECK: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14856 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
14857 // CHECK: ret i8 [[TMP1]]
test_vqrshrnh_n_s16(int16_t a)14858 int8_t test_vqrshrnh_n_s16(int16_t a) {
14859 return (int8_t)vqrshrnh_n_s16(a, 8);
14860 }
14861
14862 // CHECK-LABEL: @test_vqrshrns_n_s32(
14863 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14864 // CHECK: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14865 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
14866 // CHECK: ret i16 [[TMP1]]
test_vqrshrns_n_s32(int32_t a)14867 int16_t test_vqrshrns_n_s32(int32_t a) {
14868 return (int16_t)vqrshrns_n_s32(a, 16);
14869 }
14870
14871 // CHECK-LABEL: @test_vqrshrnd_n_s64(
14872 // CHECK: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
14873 // CHECK: ret i32 [[VQRSHRND_N_S64]]
test_vqrshrnd_n_s64(int64_t a)14874 int32_t test_vqrshrnd_n_s64(int64_t a) {
14875 return (int32_t)vqrshrnd_n_s64(a, 32);
14876 }
14877
14878 // CHECK-LABEL: @test_vqrshrnh_n_u16(
14879 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14880 // CHECK: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14881 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
14882 // CHECK: ret i8 [[TMP1]]
test_vqrshrnh_n_u16(uint16_t a)14883 uint8_t test_vqrshrnh_n_u16(uint16_t a) {
14884 return (uint8_t)vqrshrnh_n_u16(a, 8);
14885 }
14886
14887 // CHECK-LABEL: @test_vqrshrns_n_u32(
14888 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14889 // CHECK: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14890 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
14891 // CHECK: ret i16 [[TMP1]]
test_vqrshrns_n_u32(uint32_t a)14892 uint16_t test_vqrshrns_n_u32(uint32_t a) {
14893 return (uint16_t)vqrshrns_n_u32(a, 16);
14894 }
14895
14896 // CHECK-LABEL: @test_vqrshrnd_n_u64(
14897 // CHECK: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
14898 // CHECK: ret i32 [[VQRSHRND_N_U64]]
test_vqrshrnd_n_u64(uint64_t a)14899 uint32_t test_vqrshrnd_n_u64(uint64_t a) {
14900 return (uint32_t)vqrshrnd_n_u64(a, 32);
14901 }
14902
14903 // CHECK-LABEL: @test_vqshrunh_n_s16(
14904 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14905 // CHECK: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14906 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
14907 // CHECK: ret i8 [[TMP1]]
test_vqshrunh_n_s16(int16_t a)14908 int8_t test_vqshrunh_n_s16(int16_t a) {
14909 return (int8_t)vqshrunh_n_s16(a, 8);
14910 }
14911
14912 // CHECK-LABEL: @test_vqshruns_n_s32(
14913 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14914 // CHECK: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14915 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
14916 // CHECK: ret i16 [[TMP1]]
test_vqshruns_n_s32(int32_t a)14917 int16_t test_vqshruns_n_s32(int32_t a) {
14918 return (int16_t)vqshruns_n_s32(a, 16);
14919 }
14920
14921 // CHECK-LABEL: @test_vqshrund_n_s64(
14922 // CHECK: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
14923 // CHECK: ret i32 [[VQSHRUND_N_S64]]
test_vqshrund_n_s64(int64_t a)14924 int32_t test_vqshrund_n_s64(int64_t a) {
14925 return (int32_t)vqshrund_n_s64(a, 32);
14926 }
14927
14928 // CHECK-LABEL: @test_vqrshrunh_n_s16(
14929 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14930 // CHECK: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14931 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
14932 // CHECK: ret i8 [[TMP1]]
test_vqrshrunh_n_s16(int16_t a)14933 int8_t test_vqrshrunh_n_s16(int16_t a) {
14934 return (int8_t)vqrshrunh_n_s16(a, 8);
14935 }
14936
14937 // CHECK-LABEL: @test_vqrshruns_n_s32(
14938 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14939 // CHECK: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14940 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
14941 // CHECK: ret i16 [[TMP1]]
test_vqrshruns_n_s32(int32_t a)14942 int16_t test_vqrshruns_n_s32(int32_t a) {
14943 return (int16_t)vqrshruns_n_s32(a, 16);
14944 }
14945
14946 // CHECK-LABEL: @test_vqrshrund_n_s64(
14947 // CHECK: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
14948 // CHECK: ret i32 [[VQRSHRUND_N_S64]]
test_vqrshrund_n_s64(int64_t a)14949 int32_t test_vqrshrund_n_s64(int64_t a) {
14950 return (int32_t)vqrshrund_n_s64(a, 32);
14951 }
14952
14953 // CHECK-LABEL: @test_vcvts_n_f32_s32(
14954 // CHECK: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
14955 // CHECK: ret float [[VCVTS_N_F32_S32]]
test_vcvts_n_f32_s32(int32_t a)14956 float32_t test_vcvts_n_f32_s32(int32_t a) {
14957 return vcvts_n_f32_s32(a, 1);
14958 }
14959
14960 // CHECK-LABEL: @test_vcvtd_n_f64_s64(
14961 // CHECK: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
14962 // CHECK: ret double [[VCVTD_N_F64_S64]]
test_vcvtd_n_f64_s64(int64_t a)14963 float64_t test_vcvtd_n_f64_s64(int64_t a) {
14964 return vcvtd_n_f64_s64(a, 1);
14965 }
14966
14967 // CHECK-LABEL: @test_vcvts_n_f32_u32(
14968 // CHECK: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
14969 // CHECK: ret float [[VCVTS_N_F32_U32]]
test_vcvts_n_f32_u32(uint32_t a)14970 float32_t test_vcvts_n_f32_u32(uint32_t a) {
14971 return vcvts_n_f32_u32(a, 32);
14972 }
14973
14974 // CHECK-LABEL: @test_vcvtd_n_f64_u64(
14975 // CHECK: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
14976 // CHECK: ret double [[VCVTD_N_F64_U64]]
test_vcvtd_n_f64_u64(uint64_t a)14977 float64_t test_vcvtd_n_f64_u64(uint64_t a) {
14978 return vcvtd_n_f64_u64(a, 64);
14979 }
14980
14981 // CHECK-LABEL: @test_vcvts_n_s32_f32(
14982 // CHECK: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
14983 // CHECK: ret i32 [[VCVTS_N_S32_F32]]
test_vcvts_n_s32_f32(float32_t a)14984 int32_t test_vcvts_n_s32_f32(float32_t a) {
14985 return (int32_t)vcvts_n_s32_f32(a, 1);
14986 }
14987
14988 // CHECK-LABEL: @test_vcvtd_n_s64_f64(
14989 // CHECK: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
14990 // CHECK: ret i64 [[VCVTD_N_S64_F64]]
test_vcvtd_n_s64_f64(float64_t a)14991 int64_t test_vcvtd_n_s64_f64(float64_t a) {
14992 return (int64_t)vcvtd_n_s64_f64(a, 1);
14993 }
14994
14995 // CHECK-LABEL: @test_vcvts_n_u32_f32(
14996 // CHECK: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
14997 // CHECK: ret i32 [[VCVTS_N_U32_F32]]
test_vcvts_n_u32_f32(float32_t a)14998 uint32_t test_vcvts_n_u32_f32(float32_t a) {
14999 return (uint32_t)vcvts_n_u32_f32(a, 32);
15000 }
15001
15002 // CHECK-LABEL: @test_vcvtd_n_u64_f64(
15003 // CHECK: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
15004 // CHECK: ret i64 [[VCVTD_N_U64_F64]]
test_vcvtd_n_u64_f64(float64_t a)15005 uint64_t test_vcvtd_n_u64_f64(float64_t a) {
15006 return (uint64_t)vcvtd_n_u64_f64(a, 64);
15007 }
15008
15009 // CHECK-LABEL: @test_vreinterpret_s8_s16(
15010 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15011 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)15012 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
15013 return vreinterpret_s8_s16(a);
15014 }
15015
15016 // CHECK-LABEL: @test_vreinterpret_s8_s32(
15017 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15018 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)15019 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
15020 return vreinterpret_s8_s32(a);
15021 }
15022
15023 // CHECK-LABEL: @test_vreinterpret_s8_s64(
15024 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15025 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)15026 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
15027 return vreinterpret_s8_s64(a);
15028 }
15029
15030 // CHECK-LABEL: @test_vreinterpret_s8_u8(
15031 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)15032 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
15033 return vreinterpret_s8_u8(a);
15034 }
15035
15036 // CHECK-LABEL: @test_vreinterpret_s8_u16(
15037 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15038 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)15039 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
15040 return vreinterpret_s8_u16(a);
15041 }
15042
15043 // CHECK-LABEL: @test_vreinterpret_s8_u32(
15044 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15045 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)15046 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
15047 return vreinterpret_s8_u32(a);
15048 }
15049
15050 // CHECK-LABEL: @test_vreinterpret_s8_u64(
15051 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15052 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)15053 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
15054 return vreinterpret_s8_u64(a);
15055 }
15056
15057 // CHECK-LABEL: @test_vreinterpret_s8_f16(
15058 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15059 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)15060 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
15061 return vreinterpret_s8_f16(a);
15062 }
15063
15064 // CHECK-LABEL: @test_vreinterpret_s8_f32(
15065 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15066 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)15067 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
15068 return vreinterpret_s8_f32(a);
15069 }
15070
15071 // CHECK-LABEL: @test_vreinterpret_s8_f64(
15072 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15073 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f64(float64x1_t a)15074 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
15075 return vreinterpret_s8_f64(a);
15076 }
15077
15078 // CHECK-LABEL: @test_vreinterpret_s8_p8(
15079 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)15080 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
15081 return vreinterpret_s8_p8(a);
15082 }
15083
15084 // CHECK-LABEL: @test_vreinterpret_s8_p16(
15085 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15086 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)15087 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
15088 return vreinterpret_s8_p16(a);
15089 }
15090
15091 // CHECK-LABEL: @test_vreinterpret_s8_p64(
15092 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15093 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p64(poly64x1_t a)15094 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
15095 return vreinterpret_s8_p64(a);
15096 }
15097
15098 // CHECK-LABEL: @test_vreinterpret_s16_s8(
15099 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15100 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)15101 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
15102 return vreinterpret_s16_s8(a);
15103 }
15104
15105 // CHECK-LABEL: @test_vreinterpret_s16_s32(
15106 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15107 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)15108 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
15109 return vreinterpret_s16_s32(a);
15110 }
15111
15112 // CHECK-LABEL: @test_vreinterpret_s16_s64(
15113 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15114 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)15115 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
15116 return vreinterpret_s16_s64(a);
15117 }
15118
15119 // CHECK-LABEL: @test_vreinterpret_s16_u8(
15120 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15121 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)15122 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
15123 return vreinterpret_s16_u8(a);
15124 }
15125
15126 // CHECK-LABEL: @test_vreinterpret_s16_u16(
15127 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)15128 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
15129 return vreinterpret_s16_u16(a);
15130 }
15131
15132 // CHECK-LABEL: @test_vreinterpret_s16_u32(
15133 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15134 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)15135 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
15136 return vreinterpret_s16_u32(a);
15137 }
15138
15139 // CHECK-LABEL: @test_vreinterpret_s16_u64(
15140 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15141 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)15142 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
15143 return vreinterpret_s16_u64(a);
15144 }
15145
15146 // CHECK-LABEL: @test_vreinterpret_s16_f16(
15147 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15148 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)15149 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
15150 return vreinterpret_s16_f16(a);
15151 }
15152
15153 // CHECK-LABEL: @test_vreinterpret_s16_f32(
15154 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15155 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)15156 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
15157 return vreinterpret_s16_f32(a);
15158 }
15159
15160 // CHECK-LABEL: @test_vreinterpret_s16_f64(
15161 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15162 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f64(float64x1_t a)15163 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
15164 return vreinterpret_s16_f64(a);
15165 }
15166
15167 // CHECK-LABEL: @test_vreinterpret_s16_p8(
15168 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15169 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)15170 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
15171 return vreinterpret_s16_p8(a);
15172 }
15173
15174 // CHECK-LABEL: @test_vreinterpret_s16_p16(
15175 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)15176 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
15177 return vreinterpret_s16_p16(a);
15178 }
15179
15180 // CHECK-LABEL: @test_vreinterpret_s16_p64(
15181 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15182 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p64(poly64x1_t a)15183 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
15184 return vreinterpret_s16_p64(a);
15185 }
15186
15187 // CHECK-LABEL: @test_vreinterpret_s32_s8(
15188 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15189 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)15190 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
15191 return vreinterpret_s32_s8(a);
15192 }
15193
15194 // CHECK-LABEL: @test_vreinterpret_s32_s16(
15195 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15196 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)15197 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
15198 return vreinterpret_s32_s16(a);
15199 }
15200
15201 // CHECK-LABEL: @test_vreinterpret_s32_s64(
15202 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15203 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)15204 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
15205 return vreinterpret_s32_s64(a);
15206 }
15207
15208 // CHECK-LABEL: @test_vreinterpret_s32_u8(
15209 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15210 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)15211 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
15212 return vreinterpret_s32_u8(a);
15213 }
15214
15215 // CHECK-LABEL: @test_vreinterpret_s32_u16(
15216 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15217 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)15218 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
15219 return vreinterpret_s32_u16(a);
15220 }
15221
15222 // CHECK-LABEL: @test_vreinterpret_s32_u32(
15223 // CHECK: ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)15224 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
15225 return vreinterpret_s32_u32(a);
15226 }
15227
15228 // CHECK-LABEL: @test_vreinterpret_s32_u64(
15229 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15230 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)15231 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
15232 return vreinterpret_s32_u64(a);
15233 }
15234
15235 // CHECK-LABEL: @test_vreinterpret_s32_f16(
15236 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15237 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)15238 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
15239 return vreinterpret_s32_f16(a);
15240 }
15241
15242 // CHECK-LABEL: @test_vreinterpret_s32_f32(
15243 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15244 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)15245 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
15246 return vreinterpret_s32_f32(a);
15247 }
15248
15249 // CHECK-LABEL: @test_vreinterpret_s32_f64(
15250 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15251 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f64(float64x1_t a)15252 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
15253 return vreinterpret_s32_f64(a);
15254 }
15255
15256 // CHECK-LABEL: @test_vreinterpret_s32_p8(
15257 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15258 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)15259 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
15260 return vreinterpret_s32_p8(a);
15261 }
15262
15263 // CHECK-LABEL: @test_vreinterpret_s32_p16(
15264 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15265 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)15266 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
15267 return vreinterpret_s32_p16(a);
15268 }
15269
15270 // CHECK-LABEL: @test_vreinterpret_s32_p64(
15271 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15272 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p64(poly64x1_t a)15273 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
15274 return vreinterpret_s32_p64(a);
15275 }
15276
15277 // CHECK-LABEL: @test_vreinterpret_s64_s8(
15278 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15279 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)15280 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
15281 return vreinterpret_s64_s8(a);
15282 }
15283
15284 // CHECK-LABEL: @test_vreinterpret_s64_s16(
15285 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15286 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)15287 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
15288 return vreinterpret_s64_s16(a);
15289 }
15290
15291 // CHECK-LABEL: @test_vreinterpret_s64_s32(
15292 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15293 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)15294 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
15295 return vreinterpret_s64_s32(a);
15296 }
15297
15298 // CHECK-LABEL: @test_vreinterpret_s64_u8(
15299 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15300 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)15301 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
15302 return vreinterpret_s64_u8(a);
15303 }
15304
15305 // CHECK-LABEL: @test_vreinterpret_s64_u16(
15306 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15307 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)15308 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
15309 return vreinterpret_s64_u16(a);
15310 }
15311
15312 // CHECK-LABEL: @test_vreinterpret_s64_u32(
15313 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15314 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)15315 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
15316 return vreinterpret_s64_u32(a);
15317 }
15318
15319 // CHECK-LABEL: @test_vreinterpret_s64_u64(
15320 // CHECK: ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)15321 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
15322 return vreinterpret_s64_u64(a);
15323 }
15324
15325 // CHECK-LABEL: @test_vreinterpret_s64_f16(
15326 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15327 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)15328 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
15329 return vreinterpret_s64_f16(a);
15330 }
15331
15332 // CHECK-LABEL: @test_vreinterpret_s64_f32(
15333 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15334 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)15335 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
15336 return vreinterpret_s64_f32(a);
15337 }
15338
15339 // CHECK-LABEL: @test_vreinterpret_s64_f64(
15340 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15341 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f64(float64x1_t a)15342 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
15343 return vreinterpret_s64_f64(a);
15344 }
15345
15346 // CHECK-LABEL: @test_vreinterpret_s64_p8(
15347 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15348 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)15349 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
15350 return vreinterpret_s64_p8(a);
15351 }
15352
15353 // CHECK-LABEL: @test_vreinterpret_s64_p16(
15354 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15355 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)15356 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
15357 return vreinterpret_s64_p16(a);
15358 }
15359
15360 // CHECK-LABEL: @test_vreinterpret_s64_p64(
15361 // CHECK: ret <1 x i64> %a
test_vreinterpret_s64_p64(poly64x1_t a)15362 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
15363 return vreinterpret_s64_p64(a);
15364 }
15365
15366 // CHECK-LABEL: @test_vreinterpret_u8_s8(
15367 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)15368 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
15369 return vreinterpret_u8_s8(a);
15370 }
15371
15372 // CHECK-LABEL: @test_vreinterpret_u8_s16(
15373 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15374 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)15375 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
15376 return vreinterpret_u8_s16(a);
15377 }
15378
15379 // CHECK-LABEL: @test_vreinterpret_u8_s32(
15380 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15381 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)15382 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
15383 return vreinterpret_u8_s32(a);
15384 }
15385
15386 // CHECK-LABEL: @test_vreinterpret_u8_s64(
15387 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15388 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)15389 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
15390 return vreinterpret_u8_s64(a);
15391 }
15392
15393 // CHECK-LABEL: @test_vreinterpret_u8_u16(
15394 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15395 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)15396 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
15397 return vreinterpret_u8_u16(a);
15398 }
15399
15400 // CHECK-LABEL: @test_vreinterpret_u8_u32(
15401 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15402 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)15403 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
15404 return vreinterpret_u8_u32(a);
15405 }
15406
15407 // CHECK-LABEL: @test_vreinterpret_u8_u64(
15408 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15409 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)15410 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
15411 return vreinterpret_u8_u64(a);
15412 }
15413
15414 // CHECK-LABEL: @test_vreinterpret_u8_f16(
15415 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15416 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)15417 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
15418 return vreinterpret_u8_f16(a);
15419 }
15420
15421 // CHECK-LABEL: @test_vreinterpret_u8_f32(
15422 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15423 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)15424 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
15425 return vreinterpret_u8_f32(a);
15426 }
15427
15428 // CHECK-LABEL: @test_vreinterpret_u8_f64(
15429 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15430 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f64(float64x1_t a)15431 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
15432 return vreinterpret_u8_f64(a);
15433 }
15434
15435 // CHECK-LABEL: @test_vreinterpret_u8_p8(
15436 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)15437 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
15438 return vreinterpret_u8_p8(a);
15439 }
15440
15441 // CHECK-LABEL: @test_vreinterpret_u8_p16(
15442 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15443 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)15444 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
15445 return vreinterpret_u8_p16(a);
15446 }
15447
15448 // CHECK-LABEL: @test_vreinterpret_u8_p64(
15449 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15450 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p64(poly64x1_t a)15451 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
15452 return vreinterpret_u8_p64(a);
15453 }
15454
15455 // CHECK-LABEL: @test_vreinterpret_u16_s8(
15456 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15457 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)15458 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
15459 return vreinterpret_u16_s8(a);
15460 }
15461
15462 // CHECK-LABEL: @test_vreinterpret_u16_s16(
15463 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)15464 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
15465 return vreinterpret_u16_s16(a);
15466 }
15467
15468 // CHECK-LABEL: @test_vreinterpret_u16_s32(
15469 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15470 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)15471 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
15472 return vreinterpret_u16_s32(a);
15473 }
15474
15475 // CHECK-LABEL: @test_vreinterpret_u16_s64(
15476 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15477 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)15478 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
15479 return vreinterpret_u16_s64(a);
15480 }
15481
15482 // CHECK-LABEL: @test_vreinterpret_u16_u8(
15483 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15484 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)15485 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
15486 return vreinterpret_u16_u8(a);
15487 }
15488
15489 // CHECK-LABEL: @test_vreinterpret_u16_u32(
15490 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15491 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)15492 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
15493 return vreinterpret_u16_u32(a);
15494 }
15495
15496 // CHECK-LABEL: @test_vreinterpret_u16_u64(
15497 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15498 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)15499 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
15500 return vreinterpret_u16_u64(a);
15501 }
15502
15503 // CHECK-LABEL: @test_vreinterpret_u16_f16(
15504 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15505 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)15506 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
15507 return vreinterpret_u16_f16(a);
15508 }
15509
15510 // CHECK-LABEL: @test_vreinterpret_u16_f32(
15511 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15512 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)15513 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
15514 return vreinterpret_u16_f32(a);
15515 }
15516
15517 // CHECK-LABEL: @test_vreinterpret_u16_f64(
15518 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15519 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f64(float64x1_t a)15520 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
15521 return vreinterpret_u16_f64(a);
15522 }
15523
15524 // CHECK-LABEL: @test_vreinterpret_u16_p8(
15525 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15526 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)15527 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
15528 return vreinterpret_u16_p8(a);
15529 }
15530
15531 // CHECK-LABEL: @test_vreinterpret_u16_p16(
15532 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)15533 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
15534 return vreinterpret_u16_p16(a);
15535 }
15536
15537 // CHECK-LABEL: @test_vreinterpret_u16_p64(
15538 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15539 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p64(poly64x1_t a)15540 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
15541 return vreinterpret_u16_p64(a);
15542 }
15543
15544 // CHECK-LABEL: @test_vreinterpret_u32_s8(
15545 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15546 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)15547 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
15548 return vreinterpret_u32_s8(a);
15549 }
15550
15551 // CHECK-LABEL: @test_vreinterpret_u32_s16(
15552 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15553 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)15554 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
15555 return vreinterpret_u32_s16(a);
15556 }
15557
15558 // CHECK-LABEL: @test_vreinterpret_u32_s32(
15559 // CHECK: ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)15560 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
15561 return vreinterpret_u32_s32(a);
15562 }
15563
15564 // CHECK-LABEL: @test_vreinterpret_u32_s64(
15565 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15566 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)15567 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
15568 return vreinterpret_u32_s64(a);
15569 }
15570
15571 // CHECK-LABEL: @test_vreinterpret_u32_u8(
15572 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15573 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)15574 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
15575 return vreinterpret_u32_u8(a);
15576 }
15577
15578 // CHECK-LABEL: @test_vreinterpret_u32_u16(
15579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15580 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)15581 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
15582 return vreinterpret_u32_u16(a);
15583 }
15584
15585 // CHECK-LABEL: @test_vreinterpret_u32_u64(
15586 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15587 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)15588 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
15589 return vreinterpret_u32_u64(a);
15590 }
15591
15592 // CHECK-LABEL: @test_vreinterpret_u32_f16(
15593 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15594 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)15595 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
15596 return vreinterpret_u32_f16(a);
15597 }
15598
15599 // CHECK-LABEL: @test_vreinterpret_u32_f32(
15600 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15601 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)15602 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
15603 return vreinterpret_u32_f32(a);
15604 }
15605
15606 // CHECK-LABEL: @test_vreinterpret_u32_f64(
15607 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15608 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f64(float64x1_t a)15609 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
15610 return vreinterpret_u32_f64(a);
15611 }
15612
15613 // CHECK-LABEL: @test_vreinterpret_u32_p8(
15614 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15615 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)15616 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
15617 return vreinterpret_u32_p8(a);
15618 }
15619
15620 // CHECK-LABEL: @test_vreinterpret_u32_p16(
15621 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15622 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)15623 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
15624 return vreinterpret_u32_p16(a);
15625 }
15626
15627 // CHECK-LABEL: @test_vreinterpret_u32_p64(
15628 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15629 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p64(poly64x1_t a)15630 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
15631 return vreinterpret_u32_p64(a);
15632 }
15633
15634 // CHECK-LABEL: @test_vreinterpret_u64_s8(
15635 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15636 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)15637 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
15638 return vreinterpret_u64_s8(a);
15639 }
15640
15641 // CHECK-LABEL: @test_vreinterpret_u64_s16(
15642 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15643 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)15644 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
15645 return vreinterpret_u64_s16(a);
15646 }
15647
15648 // CHECK-LABEL: @test_vreinterpret_u64_s32(
15649 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15650 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)15651 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
15652 return vreinterpret_u64_s32(a);
15653 }
15654
15655 // CHECK-LABEL: @test_vreinterpret_u64_s64(
15656 // CHECK: ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)15657 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
15658 return vreinterpret_u64_s64(a);
15659 }
15660
15661 // CHECK-LABEL: @test_vreinterpret_u64_u8(
15662 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15663 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)15664 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
15665 return vreinterpret_u64_u8(a);
15666 }
15667
15668 // CHECK-LABEL: @test_vreinterpret_u64_u16(
15669 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15670 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)15671 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
15672 return vreinterpret_u64_u16(a);
15673 }
15674
15675 // CHECK-LABEL: @test_vreinterpret_u64_u32(
15676 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15677 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)15678 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
15679 return vreinterpret_u64_u32(a);
15680 }
15681
15682 // CHECK-LABEL: @test_vreinterpret_u64_f16(
15683 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15684 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)15685 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
15686 return vreinterpret_u64_f16(a);
15687 }
15688
15689 // CHECK-LABEL: @test_vreinterpret_u64_f32(
15690 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15691 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)15692 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
15693 return vreinterpret_u64_f32(a);
15694 }
15695
15696 // CHECK-LABEL: @test_vreinterpret_u64_f64(
15697 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15698 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f64(float64x1_t a)15699 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
15700 return vreinterpret_u64_f64(a);
15701 }
15702
15703 // CHECK-LABEL: @test_vreinterpret_u64_p8(
15704 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15705 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)15706 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
15707 return vreinterpret_u64_p8(a);
15708 }
15709
15710 // CHECK-LABEL: @test_vreinterpret_u64_p16(
15711 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15712 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)15713 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
15714 return vreinterpret_u64_p16(a);
15715 }
15716
15717 // CHECK-LABEL: @test_vreinterpret_u64_p64(
15718 // CHECK: ret <1 x i64> %a
test_vreinterpret_u64_p64(poly64x1_t a)15719 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
15720 return vreinterpret_u64_p64(a);
15721 }
15722
15723 // CHECK-LABEL: @test_vreinterpret_f16_s8(
15724 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15725 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)15726 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
15727 return vreinterpret_f16_s8(a);
15728 }
15729
15730 // CHECK-LABEL: @test_vreinterpret_f16_s16(
15731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15732 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)15733 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
15734 return vreinterpret_f16_s16(a);
15735 }
15736
15737 // CHECK-LABEL: @test_vreinterpret_f16_s32(
15738 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15739 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)15740 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
15741 return vreinterpret_f16_s32(a);
15742 }
15743
15744 // CHECK-LABEL: @test_vreinterpret_f16_s64(
15745 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15746 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)15747 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
15748 return vreinterpret_f16_s64(a);
15749 }
15750
15751 // CHECK-LABEL: @test_vreinterpret_f16_u8(
15752 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15753 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)15754 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
15755 return vreinterpret_f16_u8(a);
15756 }
15757
15758 // CHECK-LABEL: @test_vreinterpret_f16_u16(
15759 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15760 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)15761 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
15762 return vreinterpret_f16_u16(a);
15763 }
15764
15765 // CHECK-LABEL: @test_vreinterpret_f16_u32(
15766 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15767 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)15768 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
15769 return vreinterpret_f16_u32(a);
15770 }
15771
15772 // CHECK-LABEL: @test_vreinterpret_f16_u64(
15773 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15774 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)15775 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
15776 return vreinterpret_f16_u64(a);
15777 }
15778
15779 // CHECK-LABEL: @test_vreinterpret_f16_f32(
15780 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
15781 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)15782 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
15783 return vreinterpret_f16_f32(a);
15784 }
15785
15786 // CHECK-LABEL: @test_vreinterpret_f16_f64(
15787 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
15788 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f64(float64x1_t a)15789 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
15790 return vreinterpret_f16_f64(a);
15791 }
15792
15793 // CHECK-LABEL: @test_vreinterpret_f16_p8(
15794 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15795 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)15796 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
15797 return vreinterpret_f16_p8(a);
15798 }
15799
15800 // CHECK-LABEL: @test_vreinterpret_f16_p16(
15801 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15802 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)15803 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
15804 return vreinterpret_f16_p16(a);
15805 }
15806
15807 // CHECK-LABEL: @test_vreinterpret_f16_p64(
15808 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15809 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p64(poly64x1_t a)15810 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
15811 return vreinterpret_f16_p64(a);
15812 }
15813
15814 // CHECK-LABEL: @test_vreinterpret_f32_s8(
15815 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15816 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)15817 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
15818 return vreinterpret_f32_s8(a);
15819 }
15820
15821 // CHECK-LABEL: @test_vreinterpret_f32_s16(
15822 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15823 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)15824 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
15825 return vreinterpret_f32_s16(a);
15826 }
15827
15828 // CHECK-LABEL: @test_vreinterpret_f32_s32(
15829 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15830 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)15831 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
15832 return vreinterpret_f32_s32(a);
15833 }
15834
15835 // CHECK-LABEL: @test_vreinterpret_f32_s64(
15836 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15837 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)15838 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
15839 return vreinterpret_f32_s64(a);
15840 }
15841
15842 // CHECK-LABEL: @test_vreinterpret_f32_u8(
15843 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15844 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)15845 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
15846 return vreinterpret_f32_u8(a);
15847 }
15848
15849 // CHECK-LABEL: @test_vreinterpret_f32_u16(
15850 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15851 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)15852 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
15853 return vreinterpret_f32_u16(a);
15854 }
15855
15856 // CHECK-LABEL: @test_vreinterpret_f32_u32(
15857 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15858 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)15859 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
15860 return vreinterpret_f32_u32(a);
15861 }
15862
15863 // CHECK-LABEL: @test_vreinterpret_f32_u64(
15864 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15865 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)15866 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
15867 return vreinterpret_f32_u64(a);
15868 }
15869
15870 // CHECK-LABEL: @test_vreinterpret_f32_f16(
15871 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
15872 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)15873 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
15874 return vreinterpret_f32_f16(a);
15875 }
15876
15877 // CHECK-LABEL: @test_vreinterpret_f32_f64(
15878 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
15879 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f64(float64x1_t a)15880 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
15881 return vreinterpret_f32_f64(a);
15882 }
15883
15884 // CHECK-LABEL: @test_vreinterpret_f32_p8(
15885 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15886 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)15887 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
15888 return vreinterpret_f32_p8(a);
15889 }
15890
15891 // CHECK-LABEL: @test_vreinterpret_f32_p16(
15892 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15893 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)15894 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
15895 return vreinterpret_f32_p16(a);
15896 }
15897
15898 // CHECK-LABEL: @test_vreinterpret_f32_p64(
15899 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15900 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p64(poly64x1_t a)15901 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
15902 return vreinterpret_f32_p64(a);
15903 }
15904
15905 // CHECK-LABEL: @test_vreinterpret_f64_s8(
15906 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15907 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s8(int8x8_t a)15908 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
15909 return vreinterpret_f64_s8(a);
15910 }
15911
15912 // CHECK-LABEL: @test_vreinterpret_f64_s16(
15913 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15914 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s16(int16x4_t a)15915 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
15916 return vreinterpret_f64_s16(a);
15917 }
15918
15919 // CHECK-LABEL: @test_vreinterpret_f64_s32(
15920 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15921 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s32(int32x2_t a)15922 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
15923 return vreinterpret_f64_s32(a);
15924 }
15925
15926 // CHECK-LABEL: @test_vreinterpret_f64_s64(
15927 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15928 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s64(int64x1_t a)15929 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
15930 return vreinterpret_f64_s64(a);
15931 }
15932
15933 // CHECK-LABEL: @test_vreinterpret_f64_u8(
15934 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15935 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u8(uint8x8_t a)15936 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
15937 return vreinterpret_f64_u8(a);
15938 }
15939
15940 // CHECK-LABEL: @test_vreinterpret_f64_u16(
15941 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15942 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u16(uint16x4_t a)15943 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
15944 return vreinterpret_f64_u16(a);
15945 }
15946
15947 // CHECK-LABEL: @test_vreinterpret_f64_u32(
15948 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15949 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u32(uint32x2_t a)15950 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
15951 return vreinterpret_f64_u32(a);
15952 }
15953
15954 // CHECK-LABEL: @test_vreinterpret_f64_u64(
15955 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15956 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u64(uint64x1_t a)15957 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
15958 return vreinterpret_f64_u64(a);
15959 }
15960
15961 // CHECK-LABEL: @test_vreinterpret_f64_f16(
15962 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
15963 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f16(float16x4_t a)15964 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
15965 return vreinterpret_f64_f16(a);
15966 }
15967
15968 // CHECK-LABEL: @test_vreinterpret_f64_f32(
15969 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
15970 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f32(float32x2_t a)15971 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
15972 return vreinterpret_f64_f32(a);
15973 }
15974
15975 // CHECK-LABEL: @test_vreinterpret_f64_p8(
15976 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15977 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p8(poly8x8_t a)15978 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
15979 return vreinterpret_f64_p8(a);
15980 }
15981
15982 // CHECK-LABEL: @test_vreinterpret_f64_p16(
15983 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15984 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p16(poly16x4_t a)15985 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
15986 return vreinterpret_f64_p16(a);
15987 }
15988
15989 // CHECK-LABEL: @test_vreinterpret_f64_p64(
15990 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15991 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p64(poly64x1_t a)15992 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
15993 return vreinterpret_f64_p64(a);
15994 }
15995
15996 // CHECK-LABEL: @test_vreinterpret_p8_s8(
15997 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)15998 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
15999 return vreinterpret_p8_s8(a);
16000 }
16001
16002 // CHECK-LABEL: @test_vreinterpret_p8_s16(
16003 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16004 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)16005 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
16006 return vreinterpret_p8_s16(a);
16007 }
16008
16009 // CHECK-LABEL: @test_vreinterpret_p8_s32(
16010 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16011 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)16012 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
16013 return vreinterpret_p8_s32(a);
16014 }
16015
16016 // CHECK-LABEL: @test_vreinterpret_p8_s64(
16017 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16018 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)16019 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
16020 return vreinterpret_p8_s64(a);
16021 }
16022
16023 // CHECK-LABEL: @test_vreinterpret_p8_u8(
16024 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)16025 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
16026 return vreinterpret_p8_u8(a);
16027 }
16028
16029 // CHECK-LABEL: @test_vreinterpret_p8_u16(
16030 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16031 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)16032 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
16033 return vreinterpret_p8_u16(a);
16034 }
16035
16036 // CHECK-LABEL: @test_vreinterpret_p8_u32(
16037 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16038 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)16039 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
16040 return vreinterpret_p8_u32(a);
16041 }
16042
16043 // CHECK-LABEL: @test_vreinterpret_p8_u64(
16044 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16045 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)16046 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
16047 return vreinterpret_p8_u64(a);
16048 }
16049
16050 // CHECK-LABEL: @test_vreinterpret_p8_f16(
16051 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
16052 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)16053 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
16054 return vreinterpret_p8_f16(a);
16055 }
16056
16057 // CHECK-LABEL: @test_vreinterpret_p8_f32(
16058 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
16059 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)16060 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
16061 return vreinterpret_p8_f32(a);
16062 }
16063
16064 // CHECK-LABEL: @test_vreinterpret_p8_f64(
16065 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
16066 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f64(float64x1_t a)16067 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
16068 return vreinterpret_p8_f64(a);
16069 }
16070
16071 // CHECK-LABEL: @test_vreinterpret_p8_p16(
16072 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16073 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)16074 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
16075 return vreinterpret_p8_p16(a);
16076 }
16077
16078 // CHECK-LABEL: @test_vreinterpret_p8_p64(
16079 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16080 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p64(poly64x1_t a)16081 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
16082 return vreinterpret_p8_p64(a);
16083 }
16084
16085 // CHECK-LABEL: @test_vreinterpret_p16_s8(
16086 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16087 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)16088 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
16089 return vreinterpret_p16_s8(a);
16090 }
16091
16092 // CHECK-LABEL: @test_vreinterpret_p16_s16(
16093 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)16094 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
16095 return vreinterpret_p16_s16(a);
16096 }
16097
16098 // CHECK-LABEL: @test_vreinterpret_p16_s32(
16099 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16100 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)16101 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
16102 return vreinterpret_p16_s32(a);
16103 }
16104
16105 // CHECK-LABEL: @test_vreinterpret_p16_s64(
16106 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16107 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)16108 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
16109 return vreinterpret_p16_s64(a);
16110 }
16111
16112 // CHECK-LABEL: @test_vreinterpret_p16_u8(
16113 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16114 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)16115 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
16116 return vreinterpret_p16_u8(a);
16117 }
16118
16119 // CHECK-LABEL: @test_vreinterpret_p16_u16(
16120 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)16121 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
16122 return vreinterpret_p16_u16(a);
16123 }
16124
16125 // CHECK-LABEL: @test_vreinterpret_p16_u32(
16126 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16127 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)16128 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
16129 return vreinterpret_p16_u32(a);
16130 }
16131
16132 // CHECK-LABEL: @test_vreinterpret_p16_u64(
16133 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16134 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)16135 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
16136 return vreinterpret_p16_u64(a);
16137 }
16138
16139 // CHECK-LABEL: @test_vreinterpret_p16_f16(
16140 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
16141 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)16142 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
16143 return vreinterpret_p16_f16(a);
16144 }
16145
16146 // CHECK-LABEL: @test_vreinterpret_p16_f32(
16147 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
16148 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)16149 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
16150 return vreinterpret_p16_f32(a);
16151 }
16152
16153 // CHECK-LABEL: @test_vreinterpret_p16_f64(
16154 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
16155 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f64(float64x1_t a)16156 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
16157 return vreinterpret_p16_f64(a);
16158 }
16159
16160 // CHECK-LABEL: @test_vreinterpret_p16_p8(
16161 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16162 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)16163 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
16164 return vreinterpret_p16_p8(a);
16165 }
16166
16167 // CHECK-LABEL: @test_vreinterpret_p16_p64(
16168 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16169 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p64(poly64x1_t a)16170 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
16171 return vreinterpret_p16_p64(a);
16172 }
16173
16174 // CHECK-LABEL: @test_vreinterpret_p64_s8(
16175 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16176 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s8(int8x8_t a)16177 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
16178 return vreinterpret_p64_s8(a);
16179 }
16180
16181 // CHECK-LABEL: @test_vreinterpret_p64_s16(
16182 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16183 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s16(int16x4_t a)16184 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
16185 return vreinterpret_p64_s16(a);
16186 }
16187
16188 // CHECK-LABEL: @test_vreinterpret_p64_s32(
16189 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16190 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s32(int32x2_t a)16191 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
16192 return vreinterpret_p64_s32(a);
16193 }
16194
16195 // CHECK-LABEL: @test_vreinterpret_p64_s64(
16196 // CHECK: ret <1 x i64> %a
test_vreinterpret_p64_s64(int64x1_t a)16197 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
16198 return vreinterpret_p64_s64(a);
16199 }
16200
16201 // CHECK-LABEL: @test_vreinterpret_p64_u8(
16202 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16203 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u8(uint8x8_t a)16204 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
16205 return vreinterpret_p64_u8(a);
16206 }
16207
16208 // CHECK-LABEL: @test_vreinterpret_p64_u16(
16209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16210 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u16(uint16x4_t a)16211 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
16212 return vreinterpret_p64_u16(a);
16213 }
16214
16215 // CHECK-LABEL: @test_vreinterpret_p64_u32(
16216 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16217 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u32(uint32x2_t a)16218 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
16219 return vreinterpret_p64_u32(a);
16220 }
16221
16222 // CHECK-LABEL: @test_vreinterpret_p64_u64(
16223 // CHECK: ret <1 x i64> %a
test_vreinterpret_p64_u64(uint64x1_t a)16224 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
16225 return vreinterpret_p64_u64(a);
16226 }
16227
16228 // CHECK-LABEL: @test_vreinterpret_p64_f16(
16229 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
16230 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f16(float16x4_t a)16231 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
16232 return vreinterpret_p64_f16(a);
16233 }
16234
16235 // CHECK-LABEL: @test_vreinterpret_p64_f32(
16236 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
16237 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f32(float32x2_t a)16238 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
16239 return vreinterpret_p64_f32(a);
16240 }
16241
16242 // CHECK-LABEL: @test_vreinterpret_p64_f64(
16243 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
16244 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f64(float64x1_t a)16245 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
16246 return vreinterpret_p64_f64(a);
16247 }
16248
16249 // CHECK-LABEL: @test_vreinterpret_p64_p8(
16250 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16251 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p8(poly8x8_t a)16252 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
16253 return vreinterpret_p64_p8(a);
16254 }
16255
16256 // CHECK-LABEL: @test_vreinterpret_p64_p16(
16257 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16258 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p16(poly16x4_t a)16259 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
16260 return vreinterpret_p64_p16(a);
16261 }
16262
16263 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
16264 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16265 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)16266 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
16267 return vreinterpretq_s8_s16(a);
16268 }
16269
16270 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
16271 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16272 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)16273 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
16274 return vreinterpretq_s8_s32(a);
16275 }
16276
16277 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
16278 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16279 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)16280 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
16281 return vreinterpretq_s8_s64(a);
16282 }
16283
16284 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
16285 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)16286 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
16287 return vreinterpretq_s8_u8(a);
16288 }
16289
16290 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
16291 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16292 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)16293 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
16294 return vreinterpretq_s8_u16(a);
16295 }
16296
16297 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
16298 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16299 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)16300 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
16301 return vreinterpretq_s8_u32(a);
16302 }
16303
16304 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
16305 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16306 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)16307 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
16308 return vreinterpretq_s8_u64(a);
16309 }
16310
16311 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
16312 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16313 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)16314 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
16315 return vreinterpretq_s8_f16(a);
16316 }
16317
16318 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
16319 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16320 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)16321 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
16322 return vreinterpretq_s8_f32(a);
16323 }
16324
16325 // CHECK-LABEL: @test_vreinterpretq_s8_f64(
16326 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16327 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f64(float64x2_t a)16328 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
16329 return vreinterpretq_s8_f64(a);
16330 }
16331
16332 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
16333 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)16334 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
16335 return vreinterpretq_s8_p8(a);
16336 }
16337
16338 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
16339 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16340 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)16341 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
16342 return vreinterpretq_s8_p16(a);
16343 }
16344
16345 // CHECK-LABEL: @test_vreinterpretq_s8_p64(
16346 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16347 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p64(poly64x2_t a)16348 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
16349 return vreinterpretq_s8_p64(a);
16350 }
16351
16352 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
16353 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16354 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)16355 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
16356 return vreinterpretq_s16_s8(a);
16357 }
16358
16359 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
16360 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16361 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)16362 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
16363 return vreinterpretq_s16_s32(a);
16364 }
16365
16366 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
16367 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16368 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)16369 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
16370 return vreinterpretq_s16_s64(a);
16371 }
16372
16373 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
16374 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16375 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)16376 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
16377 return vreinterpretq_s16_u8(a);
16378 }
16379
16380 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
16381 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)16382 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
16383 return vreinterpretq_s16_u16(a);
16384 }
16385
16386 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
16387 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16388 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)16389 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
16390 return vreinterpretq_s16_u32(a);
16391 }
16392
16393 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
16394 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16395 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)16396 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
16397 return vreinterpretq_s16_u64(a);
16398 }
16399
16400 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
16401 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16402 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)16403 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
16404 return vreinterpretq_s16_f16(a);
16405 }
16406
16407 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
16408 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16409 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)16410 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
16411 return vreinterpretq_s16_f32(a);
16412 }
16413
16414 // CHECK-LABEL: @test_vreinterpretq_s16_f64(
16415 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16416 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f64(float64x2_t a)16417 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
16418 return vreinterpretq_s16_f64(a);
16419 }
16420
16421 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
16422 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16423 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)16424 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
16425 return vreinterpretq_s16_p8(a);
16426 }
16427
16428 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
16429 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)16430 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
16431 return vreinterpretq_s16_p16(a);
16432 }
16433
16434 // CHECK-LABEL: @test_vreinterpretq_s16_p64(
16435 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16436 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p64(poly64x2_t a)16437 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
16438 return vreinterpretq_s16_p64(a);
16439 }
16440
16441 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
16442 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16443 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)16444 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
16445 return vreinterpretq_s32_s8(a);
16446 }
16447
16448 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
16449 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16450 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)16451 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
16452 return vreinterpretq_s32_s16(a);
16453 }
16454
16455 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
16456 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16457 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)16458 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
16459 return vreinterpretq_s32_s64(a);
16460 }
16461
16462 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
16463 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16464 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)16465 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
16466 return vreinterpretq_s32_u8(a);
16467 }
16468
16469 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
16470 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16471 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)16472 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
16473 return vreinterpretq_s32_u16(a);
16474 }
16475
16476 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
16477 // CHECK: ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)16478 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
16479 return vreinterpretq_s32_u32(a);
16480 }
16481
16482 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
16483 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16484 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)16485 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
16486 return vreinterpretq_s32_u64(a);
16487 }
16488
16489 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
16490 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16491 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)16492 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
16493 return vreinterpretq_s32_f16(a);
16494 }
16495
16496 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
16497 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16498 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)16499 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
16500 return vreinterpretq_s32_f32(a);
16501 }
16502
16503 // CHECK-LABEL: @test_vreinterpretq_s32_f64(
16504 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16505 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f64(float64x2_t a)16506 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
16507 return vreinterpretq_s32_f64(a);
16508 }
16509
16510 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
16511 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16512 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)16513 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
16514 return vreinterpretq_s32_p8(a);
16515 }
16516
16517 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
16518 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16519 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)16520 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
16521 return vreinterpretq_s32_p16(a);
16522 }
16523
16524 // CHECK-LABEL: @test_vreinterpretq_s32_p64(
16525 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16526 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p64(poly64x2_t a)16527 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
16528 return vreinterpretq_s32_p64(a);
16529 }
16530
16531 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
16532 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16533 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)16534 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
16535 return vreinterpretq_s64_s8(a);
16536 }
16537
16538 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
16539 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16540 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)16541 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
16542 return vreinterpretq_s64_s16(a);
16543 }
16544
16545 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
16546 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16547 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)16548 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
16549 return vreinterpretq_s64_s32(a);
16550 }
16551
16552 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
16553 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16554 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)16555 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
16556 return vreinterpretq_s64_u8(a);
16557 }
16558
16559 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
16560 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16561 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)16562 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
16563 return vreinterpretq_s64_u16(a);
16564 }
16565
16566 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
16567 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16568 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)16569 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
16570 return vreinterpretq_s64_u32(a);
16571 }
16572
16573 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
16574 // CHECK: ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)16575 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
16576 return vreinterpretq_s64_u64(a);
16577 }
16578
16579 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
16580 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16581 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)16582 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
16583 return vreinterpretq_s64_f16(a);
16584 }
16585
16586 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
16587 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16588 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)16589 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
16590 return vreinterpretq_s64_f32(a);
16591 }
16592
16593 // CHECK-LABEL: @test_vreinterpretq_s64_f64(
16594 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16595 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f64(float64x2_t a)16596 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
16597 return vreinterpretq_s64_f64(a);
16598 }
16599
16600 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
16601 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16602 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)16603 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
16604 return vreinterpretq_s64_p8(a);
16605 }
16606
16607 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
16608 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16609 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)16610 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
16611 return vreinterpretq_s64_p16(a);
16612 }
16613
16614 // CHECK-LABEL: @test_vreinterpretq_s64_p64(
16615 // CHECK: ret <2 x i64> %a
test_vreinterpretq_s64_p64(poly64x2_t a)16616 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
16617 return vreinterpretq_s64_p64(a);
16618 }
16619
16620 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
16621 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)16622 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
16623 return vreinterpretq_u8_s8(a);
16624 }
16625
16626 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
16627 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16628 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)16629 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
16630 return vreinterpretq_u8_s16(a);
16631 }
16632
16633 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
16634 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16635 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)16636 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
16637 return vreinterpretq_u8_s32(a);
16638 }
16639
16640 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
16641 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16642 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)16643 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
16644 return vreinterpretq_u8_s64(a);
16645 }
16646
16647 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
16648 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16649 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)16650 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
16651 return vreinterpretq_u8_u16(a);
16652 }
16653
16654 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
16655 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16656 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)16657 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
16658 return vreinterpretq_u8_u32(a);
16659 }
16660
16661 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
16662 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16663 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)16664 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
16665 return vreinterpretq_u8_u64(a);
16666 }
16667
16668 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
16669 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16670 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)16671 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
16672 return vreinterpretq_u8_f16(a);
16673 }
16674
16675 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
16676 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16677 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)16678 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
16679 return vreinterpretq_u8_f32(a);
16680 }
16681
16682 // CHECK-LABEL: @test_vreinterpretq_u8_f64(
16683 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16684 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f64(float64x2_t a)16685 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
16686 return vreinterpretq_u8_f64(a);
16687 }
16688
16689 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
16690 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)16691 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
16692 return vreinterpretq_u8_p8(a);
16693 }
16694
16695 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
16696 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16697 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)16698 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
16699 return vreinterpretq_u8_p16(a);
16700 }
16701
16702 // CHECK-LABEL: @test_vreinterpretq_u8_p64(
16703 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16704 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p64(poly64x2_t a)16705 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
16706 return vreinterpretq_u8_p64(a);
16707 }
16708
16709 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
16710 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16711 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)16712 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
16713 return vreinterpretq_u16_s8(a);
16714 }
16715
16716 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
16717 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)16718 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
16719 return vreinterpretq_u16_s16(a);
16720 }
16721
16722 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
16723 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16724 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)16725 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
16726 return vreinterpretq_u16_s32(a);
16727 }
16728
16729 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
16730 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16731 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)16732 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
16733 return vreinterpretq_u16_s64(a);
16734 }
16735
16736 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
16737 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16738 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)16739 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
16740 return vreinterpretq_u16_u8(a);
16741 }
16742
16743 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
16744 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16745 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)16746 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
16747 return vreinterpretq_u16_u32(a);
16748 }
16749
16750 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
16751 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16752 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)16753 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
16754 return vreinterpretq_u16_u64(a);
16755 }
16756
16757 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
16758 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16759 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)16760 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
16761 return vreinterpretq_u16_f16(a);
16762 }
16763
16764 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
16765 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16766 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)16767 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
16768 return vreinterpretq_u16_f32(a);
16769 }
16770
16771 // CHECK-LABEL: @test_vreinterpretq_u16_f64(
16772 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16773 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f64(float64x2_t a)16774 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
16775 return vreinterpretq_u16_f64(a);
16776 }
16777
16778 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
16779 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16780 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)16781 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
16782 return vreinterpretq_u16_p8(a);
16783 }
16784
16785 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
16786 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)16787 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
16788 return vreinterpretq_u16_p16(a);
16789 }
16790
16791 // CHECK-LABEL: @test_vreinterpretq_u16_p64(
16792 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16793 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p64(poly64x2_t a)16794 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
16795 return vreinterpretq_u16_p64(a);
16796 }
16797
16798 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
16799 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16800 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)16801 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
16802 return vreinterpretq_u32_s8(a);
16803 }
16804
16805 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
16806 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16807 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)16808 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
16809 return vreinterpretq_u32_s16(a);
16810 }
16811
16812 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
16813 // CHECK: ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)16814 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
16815 return vreinterpretq_u32_s32(a);
16816 }
16817
16818 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
16819 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16820 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)16821 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
16822 return vreinterpretq_u32_s64(a);
16823 }
16824
16825 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
16826 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16827 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)16828 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
16829 return vreinterpretq_u32_u8(a);
16830 }
16831
16832 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
16833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16834 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)16835 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
16836 return vreinterpretq_u32_u16(a);
16837 }
16838
16839 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
16840 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16841 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)16842 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
16843 return vreinterpretq_u32_u64(a);
16844 }
16845
16846 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
16847 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16848 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)16849 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
16850 return vreinterpretq_u32_f16(a);
16851 }
16852
16853 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
16854 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16855 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)16856 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
16857 return vreinterpretq_u32_f32(a);
16858 }
16859
16860 // CHECK-LABEL: @test_vreinterpretq_u32_f64(
16861 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16862 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f64(float64x2_t a)16863 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
16864 return vreinterpretq_u32_f64(a);
16865 }
16866
16867 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
16868 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16869 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)16870 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
16871 return vreinterpretq_u32_p8(a);
16872 }
16873
16874 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
16875 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16876 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)16877 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
16878 return vreinterpretq_u32_p16(a);
16879 }
16880
16881 // CHECK-LABEL: @test_vreinterpretq_u32_p64(
16882 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16883 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p64(poly64x2_t a)16884 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
16885 return vreinterpretq_u32_p64(a);
16886 }
16887
16888 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
16889 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16890 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)16891 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
16892 return vreinterpretq_u64_s8(a);
16893 }
16894
16895 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
16896 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16897 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)16898 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
16899 return vreinterpretq_u64_s16(a);
16900 }
16901
16902 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
16903 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16904 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)16905 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
16906 return vreinterpretq_u64_s32(a);
16907 }
16908
16909 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
16910 // CHECK: ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)16911 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
16912 return vreinterpretq_u64_s64(a);
16913 }
16914
16915 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
16916 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16917 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)16918 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
16919 return vreinterpretq_u64_u8(a);
16920 }
16921
16922 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
16923 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16924 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)16925 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
16926 return vreinterpretq_u64_u16(a);
16927 }
16928
16929 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
16930 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16931 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)16932 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
16933 return vreinterpretq_u64_u32(a);
16934 }
16935
16936 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
16937 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16938 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)16939 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
16940 return vreinterpretq_u64_f16(a);
16941 }
16942
16943 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
16944 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16945 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)16946 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
16947 return vreinterpretq_u64_f32(a);
16948 }
16949
16950 // CHECK-LABEL: @test_vreinterpretq_u64_f64(
16951 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16952 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f64(float64x2_t a)16953 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
16954 return vreinterpretq_u64_f64(a);
16955 }
16956
16957 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
16958 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16959 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)16960 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
16961 return vreinterpretq_u64_p8(a);
16962 }
16963
16964 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
16965 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16966 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)16967 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
16968 return vreinterpretq_u64_p16(a);
16969 }
16970
16971 // CHECK-LABEL: @test_vreinterpretq_u64_p64(
16972 // CHECK: ret <2 x i64> %a
test_vreinterpretq_u64_p64(poly64x2_t a)16973 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
16974 return vreinterpretq_u64_p64(a);
16975 }
16976
16977 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
16978 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16979 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)16980 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
16981 return vreinterpretq_f16_s8(a);
16982 }
16983
16984 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
16985 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16986 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)16987 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
16988 return vreinterpretq_f16_s16(a);
16989 }
16990
16991 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
16992 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
16993 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)16994 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
16995 return vreinterpretq_f16_s32(a);
16996 }
16997
16998 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
16999 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17000 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)17001 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
17002 return vreinterpretq_f16_s64(a);
17003 }
17004
17005 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
17006 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
17007 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)17008 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
17009 return vreinterpretq_f16_u8(a);
17010 }
17011
17012 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
17013 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17014 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)17015 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
17016 return vreinterpretq_f16_u16(a);
17017 }
17018
17019 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
17020 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
17021 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)17022 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
17023 return vreinterpretq_f16_u32(a);
17024 }
17025
17026 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
17027 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17028 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)17029 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
17030 return vreinterpretq_f16_u64(a);
17031 }
17032
17033 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
17034 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
17035 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)17036 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
17037 return vreinterpretq_f16_f32(a);
17038 }
17039
17040 // CHECK-LABEL: @test_vreinterpretq_f16_f64(
17041 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
17042 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f64(float64x2_t a)17043 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
17044 return vreinterpretq_f16_f64(a);
17045 }
17046
17047 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
17048 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
17049 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)17050 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
17051 return vreinterpretq_f16_p8(a);
17052 }
17053
17054 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
17055 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17056 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)17057 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
17058 return vreinterpretq_f16_p16(a);
17059 }
17060
17061 // CHECK-LABEL: @test_vreinterpretq_f16_p64(
17062 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17063 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p64(poly64x2_t a)17064 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
17065 return vreinterpretq_f16_p64(a);
17066 }
17067
17068 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
17069 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17070 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)17071 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
17072 return vreinterpretq_f32_s8(a);
17073 }
17074
17075 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
17076 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17077 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)17078 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
17079 return vreinterpretq_f32_s16(a);
17080 }
17081
17082 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
17083 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17084 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)17085 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
17086 return vreinterpretq_f32_s32(a);
17087 }
17088
17089 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
17090 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17091 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)17092 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
17093 return vreinterpretq_f32_s64(a);
17094 }
17095
17096 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
17097 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17098 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)17099 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
17100 return vreinterpretq_f32_u8(a);
17101 }
17102
17103 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
17104 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17105 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)17106 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
17107 return vreinterpretq_f32_u16(a);
17108 }
17109
17110 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
17111 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17112 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)17113 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
17114 return vreinterpretq_f32_u32(a);
17115 }
17116
17117 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
17118 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17119 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)17120 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
17121 return vreinterpretq_f32_u64(a);
17122 }
17123
17124 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
17125 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
17126 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)17127 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
17128 return vreinterpretq_f32_f16(a);
17129 }
17130
17131 // CHECK-LABEL: @test_vreinterpretq_f32_f64(
17132 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
17133 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f64(float64x2_t a)17134 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
17135 return vreinterpretq_f32_f64(a);
17136 }
17137
17138 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
17139 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17140 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)17141 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
17142 return vreinterpretq_f32_p8(a);
17143 }
17144
17145 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
17146 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17147 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)17148 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
17149 return vreinterpretq_f32_p16(a);
17150 }
17151
17152 // CHECK-LABEL: @test_vreinterpretq_f32_p64(
17153 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17154 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p64(poly64x2_t a)17155 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
17156 return vreinterpretq_f32_p64(a);
17157 }
17158
17159 // CHECK-LABEL: @test_vreinterpretq_f64_s8(
17160 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17161 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s8(int8x16_t a)17162 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
17163 return vreinterpretq_f64_s8(a);
17164 }
17165
17166 // CHECK-LABEL: @test_vreinterpretq_f64_s16(
17167 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17168 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s16(int16x8_t a)17169 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
17170 return vreinterpretq_f64_s16(a);
17171 }
17172
17173 // CHECK-LABEL: @test_vreinterpretq_f64_s32(
17174 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17175 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s32(int32x4_t a)17176 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
17177 return vreinterpretq_f64_s32(a);
17178 }
17179
17180 // CHECK-LABEL: @test_vreinterpretq_f64_s64(
17181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17182 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s64(int64x2_t a)17183 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
17184 return vreinterpretq_f64_s64(a);
17185 }
17186
17187 // CHECK-LABEL: @test_vreinterpretq_f64_u8(
17188 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17189 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u8(uint8x16_t a)17190 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
17191 return vreinterpretq_f64_u8(a);
17192 }
17193
17194 // CHECK-LABEL: @test_vreinterpretq_f64_u16(
17195 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17196 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u16(uint16x8_t a)17197 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
17198 return vreinterpretq_f64_u16(a);
17199 }
17200
17201 // CHECK-LABEL: @test_vreinterpretq_f64_u32(
17202 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17203 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u32(uint32x4_t a)17204 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
17205 return vreinterpretq_f64_u32(a);
17206 }
17207
17208 // CHECK-LABEL: @test_vreinterpretq_f64_u64(
17209 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17210 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u64(uint64x2_t a)17211 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
17212 return vreinterpretq_f64_u64(a);
17213 }
17214
17215 // CHECK-LABEL: @test_vreinterpretq_f64_f16(
17216 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
17217 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f16(float16x8_t a)17218 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
17219 return vreinterpretq_f64_f16(a);
17220 }
17221
17222 // CHECK-LABEL: @test_vreinterpretq_f64_f32(
17223 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
17224 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f32(float32x4_t a)17225 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
17226 return vreinterpretq_f64_f32(a);
17227 }
17228
17229 // CHECK-LABEL: @test_vreinterpretq_f64_p8(
17230 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17231 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p8(poly8x16_t a)17232 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
17233 return vreinterpretq_f64_p8(a);
17234 }
17235
17236 // CHECK-LABEL: @test_vreinterpretq_f64_p16(
17237 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17238 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p16(poly16x8_t a)17239 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
17240 return vreinterpretq_f64_p16(a);
17241 }
17242
17243 // CHECK-LABEL: @test_vreinterpretq_f64_p64(
17244 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17245 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p64(poly64x2_t a)17246 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
17247 return vreinterpretq_f64_p64(a);
17248 }
17249
17250 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
17251 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)17252 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
17253 return vreinterpretq_p8_s8(a);
17254 }
17255
17256 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
17257 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17258 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)17259 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
17260 return vreinterpretq_p8_s16(a);
17261 }
17262
17263 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
17264 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17265 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)17266 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
17267 return vreinterpretq_p8_s32(a);
17268 }
17269
17270 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
17271 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17272 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)17273 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
17274 return vreinterpretq_p8_s64(a);
17275 }
17276
17277 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
17278 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)17279 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
17280 return vreinterpretq_p8_u8(a);
17281 }
17282
17283 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
17284 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17285 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)17286 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
17287 return vreinterpretq_p8_u16(a);
17288 }
17289
17290 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
17291 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17292 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)17293 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
17294 return vreinterpretq_p8_u32(a);
17295 }
17296
17297 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
17298 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17299 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)17300 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
17301 return vreinterpretq_p8_u64(a);
17302 }
17303
17304 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
17305 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
17306 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)17307 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
17308 return vreinterpretq_p8_f16(a);
17309 }
17310
17311 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
17312 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
17313 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)17314 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
17315 return vreinterpretq_p8_f32(a);
17316 }
17317
17318 // CHECK-LABEL: @test_vreinterpretq_p8_f64(
17319 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17320 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f64(float64x2_t a)17321 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
17322 return vreinterpretq_p8_f64(a);
17323 }
17324
17325 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
17326 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17327 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)17328 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
17329 return vreinterpretq_p8_p16(a);
17330 }
17331
17332 // CHECK-LABEL: @test_vreinterpretq_p8_p64(
17333 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17334 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p64(poly64x2_t a)17335 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
17336 return vreinterpretq_p8_p64(a);
17337 }
17338
17339 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
17340 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17341 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)17342 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
17343 return vreinterpretq_p16_s8(a);
17344 }
17345
17346 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
17347 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)17348 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
17349 return vreinterpretq_p16_s16(a);
17350 }
17351
17352 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
17353 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17354 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)17355 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
17356 return vreinterpretq_p16_s32(a);
17357 }
17358
17359 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
17360 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17361 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)17362 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
17363 return vreinterpretq_p16_s64(a);
17364 }
17365
17366 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
17367 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17368 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)17369 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
17370 return vreinterpretq_p16_u8(a);
17371 }
17372
17373 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
17374 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)17375 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
17376 return vreinterpretq_p16_u16(a);
17377 }
17378
17379 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
17380 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17381 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)17382 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
17383 return vreinterpretq_p16_u32(a);
17384 }
17385
17386 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
17387 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17388 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)17389 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
17390 return vreinterpretq_p16_u64(a);
17391 }
17392
17393 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
17394 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
17395 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)17396 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
17397 return vreinterpretq_p16_f16(a);
17398 }
17399
17400 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
17401 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
17402 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)17403 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
17404 return vreinterpretq_p16_f32(a);
17405 }
17406
17407 // CHECK-LABEL: @test_vreinterpretq_p16_f64(
17408 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
17409 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f64(float64x2_t a)17410 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
17411 return vreinterpretq_p16_f64(a);
17412 }
17413
17414 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
17415 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17416 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)17417 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
17418 return vreinterpretq_p16_p8(a);
17419 }
17420
17421 // CHECK-LABEL: @test_vreinterpretq_p16_p64(
17422 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17423 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p64(poly64x2_t a)17424 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
17425 return vreinterpretq_p16_p64(a);
17426 }
17427
17428 // CHECK-LABEL: @test_vreinterpretq_p64_s8(
17429 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17430 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s8(int8x16_t a)17431 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
17432 return vreinterpretq_p64_s8(a);
17433 }
17434
17435 // CHECK-LABEL: @test_vreinterpretq_p64_s16(
17436 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17437 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s16(int16x8_t a)17438 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
17439 return vreinterpretq_p64_s16(a);
17440 }
17441
17442 // CHECK-LABEL: @test_vreinterpretq_p64_s32(
17443 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17444 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s32(int32x4_t a)17445 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
17446 return vreinterpretq_p64_s32(a);
17447 }
17448
17449 // CHECK-LABEL: @test_vreinterpretq_p64_s64(
17450 // CHECK: ret <2 x i64> %a
test_vreinterpretq_p64_s64(int64x2_t a)17451 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
17452 return vreinterpretq_p64_s64(a);
17453 }
17454
17455 // CHECK-LABEL: @test_vreinterpretq_p64_u8(
17456 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17457 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u8(uint8x16_t a)17458 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
17459 return vreinterpretq_p64_u8(a);
17460 }
17461
17462 // CHECK-LABEL: @test_vreinterpretq_p64_u16(
17463 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17464 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u16(uint16x8_t a)17465 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
17466 return vreinterpretq_p64_u16(a);
17467 }
17468
17469 // CHECK-LABEL: @test_vreinterpretq_p64_u32(
17470 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17471 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u32(uint32x4_t a)17472 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
17473 return vreinterpretq_p64_u32(a);
17474 }
17475
17476 // CHECK-LABEL: @test_vreinterpretq_p64_u64(
17477 // CHECK: ret <2 x i64> %a
test_vreinterpretq_p64_u64(uint64x2_t a)17478 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
17479 return vreinterpretq_p64_u64(a);
17480 }
17481
17482 // CHECK-LABEL: @test_vreinterpretq_p64_f16(
17483 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
17484 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f16(float16x8_t a)17485 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
17486 return vreinterpretq_p64_f16(a);
17487 }
17488
17489 // CHECK-LABEL: @test_vreinterpretq_p64_f32(
17490 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
17491 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f32(float32x4_t a)17492 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
17493 return vreinterpretq_p64_f32(a);
17494 }
17495
17496 // CHECK-LABEL: @test_vreinterpretq_p64_f64(
17497 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
17498 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f64(float64x2_t a)17499 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
17500 return vreinterpretq_p64_f64(a);
17501 }
17502
17503 // CHECK-LABEL: @test_vreinterpretq_p64_p8(
17504 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17505 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p8(poly8x16_t a)17506 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
17507 return vreinterpretq_p64_p8(a);
17508 }
17509
17510 // CHECK-LABEL: @test_vreinterpretq_p64_p16(
17511 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17512 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p16(poly16x8_t a)17513 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
17514 return vreinterpretq_p64_p16(a);
17515 }
17516
17517 // CHECK-LABEL: @test_vabds_f32(
17518 // CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b)
17519 // CHECK: ret float [[VABDS_F32_I]]
test_vabds_f32(float32_t a,float32_t b)17520 float32_t test_vabds_f32(float32_t a, float32_t b) {
17521 return vabds_f32(a, b);
17522 }
17523
17524 // CHECK-LABEL: @test_vabdd_f64(
17525 // CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b)
17526 // CHECK: ret double [[VABDD_F64_I]]
test_vabdd_f64(float64_t a,float64_t b)17527 float64_t test_vabdd_f64(float64_t a, float64_t b) {
17528 return vabdd_f64(a, b);
17529 }
17530
17531 // CHECK-LABEL: @test_vuqaddq_s8(
17532 // CHECK: entry:
17533 // CHECK-NEXT: [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17534 // CHECK-NEXT: ret <16 x i8> [[V]]
test_vuqaddq_s8(int8x16_t a,uint8x16_t b)17535 int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) {
17536 return vuqaddq_s8(a, b);
17537 }
17538
17539 // CHECK-LABEL: @test_vuqaddq_s32(
17540 // CHECK: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17541 // CHECK-NEXT: ret <4 x i32> [[V]]
test_vuqaddq_s32(int32x4_t a,uint32x4_t b)17542 int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) {
17543 return vuqaddq_s32(a, b);
17544 }
17545
17546 // CHECK-LABEL: @test_vuqaddq_s64(
17547 // CHECK: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17548 // CHECK-NEXT: ret <2 x i64> [[V]]
test_vuqaddq_s64(int64x2_t a,uint64x2_t b)17549 int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) {
17550 return vuqaddq_s64(a, b);
17551 }
17552
17553 // CHECK-LABEL: @test_vuqaddq_s16(
17554 // CHECK: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17555 // CHECK-NEXT: ret <8 x i16> [[V]]
test_vuqaddq_s16(int16x8_t a,uint16x8_t b)17556 int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) {
17557 return vuqaddq_s16(a, b);
17558 }
17559
17560 // CHECK-LABEL: @test_vuqadd_s8(
17561 // CHECK: entry:
17562 // CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17563 // CHECK-NEXT: ret <8 x i8> [[V]]
test_vuqadd_s8(int8x8_t a,uint8x8_t b)17564 int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) {
17565 return vuqadd_s8(a, b);
17566 }
17567
17568 // CHECK-LABEL: @test_vuqadd_s32(
17569 // CHECK: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17570 // CHECK-NEXT: ret <2 x i32> [[V]]
test_vuqadd_s32(int32x2_t a,uint32x2_t b)17571 int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) {
17572 return vuqadd_s32(a, b);
17573 }
17574
17575 // CHECK-LABEL: @test_vuqadd_s64(
17576 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17577 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17578 // CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17579 // CHECK: ret <1 x i64> [[VUQADD2_I]]
test_vuqadd_s64(int64x1_t a,uint64x1_t b)17580 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
17581 return vuqadd_s64(a, b);
17582 }
17583
17584 // CHECK-LABEL: @test_vuqadd_s16(
17585 // CHECK: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17586 // CHECK-NEXT: ret <4 x i16> [[V]]
test_vuqadd_s16(int16x4_t a,uint16x4_t b)17587 int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) {
17588 return vuqadd_s16(a, b);
17589 }
17590
17591 // CHECK-LABEL: @test_vsqadd_u64(
17592 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17593 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17594 // CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17595 // CHECK: ret <1 x i64> [[VSQADD2_I]]
test_vsqadd_u64(uint64x1_t a,int64x1_t b)17596 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
17597 return vsqadd_u64(a, b);
17598 }
17599
17600 // CHECK-LABEL: @test_vsqadd_u8(
17601 // CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17602 // CHECK: ret <8 x i8> [[VSQADD_I]]
test_vsqadd_u8(uint8x8_t a,int8x8_t b)17603 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
17604 return vsqadd_u8(a, b);
17605 }
17606
17607 // CHECK-LABEL: @test_vsqaddq_u8(
17608 // CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17609 // CHECK: ret <16 x i8> [[VSQADD_I]]
test_vsqaddq_u8(uint8x16_t a,int8x16_t b)17610 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
17611 return vsqaddq_u8(a, b);
17612 }
17613
17614 // CHECK-LABEL: @test_vsqadd_u16(
17615 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17616 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17617 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17618 // CHECK: ret <4 x i16> [[VSQADD2_I]]
test_vsqadd_u16(uint16x4_t a,int16x4_t b)17619 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
17620 return vsqadd_u16(a, b);
17621 }
17622
17623 // CHECK-LABEL: @test_vsqaddq_u16(
17624 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17625 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17626 // CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17627 // CHECK: ret <8 x i16> [[VSQADD2_I]]
test_vsqaddq_u16(uint16x8_t a,int16x8_t b)17628 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
17629 return vsqaddq_u16(a, b);
17630 }
17631
17632 // CHECK-LABEL: @test_vsqadd_u32(
17633 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17634 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17635 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17636 // CHECK: ret <2 x i32> [[VSQADD2_I]]
test_vsqadd_u32(uint32x2_t a,int32x2_t b)17637 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
17638 return vsqadd_u32(a, b);
17639 }
17640
17641 // CHECK-LABEL: @test_vsqaddq_u32(
17642 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17643 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17644 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17645 // CHECK: ret <4 x i32> [[VSQADD2_I]]
test_vsqaddq_u32(uint32x4_t a,int32x4_t b)17646 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
17647 return vsqaddq_u32(a, b);
17648 }
17649
17650 // CHECK-LABEL: @test_vsqaddq_u64(
17651 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17652 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17653 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17654 // CHECK: ret <2 x i64> [[VSQADD2_I]]
test_vsqaddq_u64(uint64x2_t a,int64x2_t b)17655 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
17656 return vsqaddq_u64(a, b);
17657 }
17658
17659 // CHECK-LABEL: @test_vabs_s64(
17660 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17661 // CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
17662 // CHECK: ret <1 x i64> [[VABS1_I]]
test_vabs_s64(int64x1_t a)17663 int64x1_t test_vabs_s64(int64x1_t a) {
17664 return vabs_s64(a);
17665 }
17666
17667 // CHECK-LABEL: @test_vqabs_s64(
17668 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17669 // CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
17670 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
17671 // CHECK: ret <1 x i64> [[VQABS_V1_I]]
test_vqabs_s64(int64x1_t a)17672 int64x1_t test_vqabs_s64(int64x1_t a) {
17673 return vqabs_s64(a);
17674 }
17675
17676 // CHECK-LABEL: @test_vqneg_s64(
17677 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17678 // CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
17679 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
17680 // CHECK: ret <1 x i64> [[VQNEG_V1_I]]
test_vqneg_s64(int64x1_t a)17681 int64x1_t test_vqneg_s64(int64x1_t a) {
17682 return vqneg_s64(a);
17683 }
17684
17685 // CHECK-LABEL: @test_vneg_s64(
17686 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
17687 // CHECK: ret <1 x i64> [[SUB_I]]
test_vneg_s64(int64x1_t a)17688 int64x1_t test_vneg_s64(int64x1_t a) {
17689 return vneg_s64(a);
17690 }
17691
17692 // CHECK-LABEL: @test_vaddv_f32(
17693 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17694 // CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
17695 // CHECK: ret float [[VADDV_F32_I]]
test_vaddv_f32(float32x2_t a)17696 float32_t test_vaddv_f32(float32x2_t a) {
17697 return vaddv_f32(a);
17698 }
17699
17700 // CHECK-LABEL: @test_vaddvq_f32(
17701 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
17702 // CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
17703 // CHECK: ret float [[VADDVQ_F32_I]]
test_vaddvq_f32(float32x4_t a)17704 float32_t test_vaddvq_f32(float32x4_t a) {
17705 return vaddvq_f32(a);
17706 }
17707
17708 // CHECK-LABEL: @test_vaddvq_f64(
17709 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17710 // CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
17711 // CHECK: ret double [[VADDVQ_F64_I]]
test_vaddvq_f64(float64x2_t a)17712 float64_t test_vaddvq_f64(float64x2_t a) {
17713 return vaddvq_f64(a);
17714 }
17715
17716 // CHECK-LABEL: @test_vmaxv_f32(
17717 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17718 // CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
17719 // CHECK: ret float [[VMAXV_F32_I]]
test_vmaxv_f32(float32x2_t a)17720 float32_t test_vmaxv_f32(float32x2_t a) {
17721 return vmaxv_f32(a);
17722 }
17723
17724 // CHECK-LABEL: @test_vmaxvq_f64(
17725 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17726 // CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
17727 // CHECK: ret double [[VMAXVQ_F64_I]]
test_vmaxvq_f64(float64x2_t a)17728 float64_t test_vmaxvq_f64(float64x2_t a) {
17729 return vmaxvq_f64(a);
17730 }
17731
17732 // CHECK-LABEL: @test_vminv_f32(
17733 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17734 // CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
17735 // CHECK: ret float [[VMINV_F32_I]]
test_vminv_f32(float32x2_t a)17736 float32_t test_vminv_f32(float32x2_t a) {
17737 return vminv_f32(a);
17738 }
17739
17740 // CHECK-LABEL: @test_vminvq_f64(
17741 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17742 // CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
17743 // CHECK: ret double [[VMINVQ_F64_I]]
test_vminvq_f64(float64x2_t a)17744 float64_t test_vminvq_f64(float64x2_t a) {
17745 return vminvq_f64(a);
17746 }
17747
17748 // CHECK-LABEL: @test_vmaxnmvq_f64(
17749 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17750 // CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
17751 // CHECK: ret double [[VMAXNMVQ_F64_I]]
test_vmaxnmvq_f64(float64x2_t a)17752 float64_t test_vmaxnmvq_f64(float64x2_t a) {
17753 return vmaxnmvq_f64(a);
17754 }
17755
17756 // CHECK-LABEL: @test_vmaxnmv_f32(
17757 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17758 // CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
17759 // CHECK: ret float [[VMAXNMV_F32_I]]
test_vmaxnmv_f32(float32x2_t a)17760 float32_t test_vmaxnmv_f32(float32x2_t a) {
17761 return vmaxnmv_f32(a);
17762 }
17763
17764 // CHECK-LABEL: @test_vminnmvq_f64(
17765 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17766 // CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
17767 // CHECK: ret double [[VMINNMVQ_F64_I]]
test_vminnmvq_f64(float64x2_t a)17768 float64_t test_vminnmvq_f64(float64x2_t a) {
17769 return vminnmvq_f64(a);
17770 }
17771
17772 // CHECK-LABEL: @test_vminnmv_f32(
17773 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17774 // CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
17775 // CHECK: ret float [[VMINNMV_F32_I]]
test_vminnmv_f32(float32x2_t a)17776 float32_t test_vminnmv_f32(float32x2_t a) {
17777 return vminnmv_f32(a);
17778 }
17779
17780 // CHECK-LABEL: @test_vpaddq_s64(
17781 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17782 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17783 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17784 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17785 // CHECK: ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_s64(int64x2_t a,int64x2_t b)17786 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
17787 return vpaddq_s64(a, b);
17788 }
17789
17790 // CHECK-LABEL: @test_vpaddq_u64(
17791 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17792 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17793 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17794 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17795 // CHECK: ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_u64(uint64x2_t a,uint64x2_t b)17796 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
17797 return vpaddq_u64(a, b);
17798 }
17799
17800 // CHECK-LABEL: @test_vpaddd_u64(
17801 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17802 // CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17803 // CHECK: ret i64 [[VPADDD_U64_I]]
test_vpaddd_u64(uint64x2_t a)17804 uint64_t test_vpaddd_u64(uint64x2_t a) {
17805 return vpaddd_u64(a);
17806 }
17807
17808 // CHECK-LABEL: @test_vaddvq_s64(
17809 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17810 // CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
17811 // CHECK: ret i64 [[VADDVQ_S64_I]]
test_vaddvq_s64(int64x2_t a)17812 int64_t test_vaddvq_s64(int64x2_t a) {
17813 return vaddvq_s64(a);
17814 }
17815
17816 // CHECK-LABEL: @test_vaddvq_u64(
17817 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17818 // CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17819 // CHECK: ret i64 [[VADDVQ_U64_I]]
test_vaddvq_u64(uint64x2_t a)17820 uint64_t test_vaddvq_u64(uint64x2_t a) {
17821 return vaddvq_u64(a);
17822 }
17823
17824 // CHECK-LABEL: @test_vadd_f64(
17825 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, %b
17826 // CHECK: ret <1 x double> [[ADD_I]]
test_vadd_f64(float64x1_t a,float64x1_t b)17827 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
17828 return vadd_f64(a, b);
17829 }
17830
17831 // CHECK-LABEL: @test_vmul_f64(
17832 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %a, %b
17833 // CHECK: ret <1 x double> [[MUL_I]]
test_vmul_f64(float64x1_t a,float64x1_t b)17834 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
17835 return vmul_f64(a, b);
17836 }
17837
17838 // CHECK-LABEL: @test_vdiv_f64(
17839 // CHECK: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
17840 // CHECK: ret <1 x double> [[DIV_I]]
test_vdiv_f64(float64x1_t a,float64x1_t b)17841 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
17842 return vdiv_f64(a, b);
17843 }
17844
17845 // CHECK-LABEL: @test_vmla_f64(
17846 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17847 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
17848 // CHECK: ret <1 x double> [[ADD_I]]
test_vmla_f64(float64x1_t a,float64x1_t b,float64x1_t c)17849 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17850 return vmla_f64(a, b, c);
17851 }
17852
17853 // CHECK-LABEL: @test_vmls_f64(
17854 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17855 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
17856 // CHECK: ret <1 x double> [[SUB_I]]
test_vmls_f64(float64x1_t a,float64x1_t b,float64x1_t c)17857 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17858 return vmls_f64(a, b, c);
17859 }
17860
17861 // CHECK-LABEL: @test_vfma_f64(
17862 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17863 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17864 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17865 // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
17866 // CHECK: ret <1 x double> [[TMP3]]
test_vfma_f64(float64x1_t a,float64x1_t b,float64x1_t c)17867 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17868 return vfma_f64(a, b, c);
17869 }
17870
17871 // CHECK-LABEL: @test_vfms_f64(
17872 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
17873 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17874 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
17875 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17876 // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
17877 // CHECK: ret <1 x double> [[TMP3]]
test_vfms_f64(float64x1_t a,float64x1_t b,float64x1_t c)17878 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17879 return vfms_f64(a, b, c);
17880 }
17881
17882 // CHECK-LABEL: @test_vsub_f64(
17883 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, %b
17884 // CHECK: ret <1 x double> [[SUB_I]]
test_vsub_f64(float64x1_t a,float64x1_t b)17885 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
17886 return vsub_f64(a, b);
17887 }
17888
17889 // CHECK-LABEL: @test_vabd_f64(
17890 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17891 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17892 // CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
17893 // CHECK: ret <1 x double> [[VABD2_I]]
test_vabd_f64(float64x1_t a,float64x1_t b)17894 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
17895 return vabd_f64(a, b);
17896 }
17897
17898 // CHECK-LABEL: @test_vmax_f64(
17899 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17900 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17901 // CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
17902 // CHECK: ret <1 x double> [[VMAX2_I]]
test_vmax_f64(float64x1_t a,float64x1_t b)17903 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
17904 return vmax_f64(a, b);
17905 }
17906
17907 // CHECK-LABEL: @test_vmin_f64(
17908 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17909 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17910 // CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
17911 // CHECK: ret <1 x double> [[VMIN2_I]]
test_vmin_f64(float64x1_t a,float64x1_t b)17912 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
17913 return vmin_f64(a, b);
17914 }
17915
17916 // CHECK-LABEL: @test_vmaxnm_f64(
17917 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17918 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17919 // CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
17920 // CHECK: ret <1 x double> [[VMAXNM2_I]]
test_vmaxnm_f64(float64x1_t a,float64x1_t b)17921 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
17922 return vmaxnm_f64(a, b);
17923 }
17924
17925 // CHECK-LABEL: @test_vminnm_f64(
17926 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17927 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17928 // CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
17929 // CHECK: ret <1 x double> [[VMINNM2_I]]
test_vminnm_f64(float64x1_t a,float64x1_t b)17930 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
17931 return vminnm_f64(a, b);
17932 }
17933
17934 // CHECK-LABEL: @test_vabs_f64(
17935 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17936 // CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
17937 // CHECK: ret <1 x double> [[VABS1_I]]
test_vabs_f64(float64x1_t a)17938 float64x1_t test_vabs_f64(float64x1_t a) {
17939 return vabs_f64(a);
17940 }
17941
17942 // CHECK-LABEL: @test_vneg_f64(
17943 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %a
17944 // CHECK: ret <1 x double> [[SUB_I]]
test_vneg_f64(float64x1_t a)17945 float64x1_t test_vneg_f64(float64x1_t a) {
17946 return vneg_f64(a);
17947 }
17948
17949 // CHECK-LABEL: @test_vcvt_s64_f64(
17950 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17951 // CHECK: [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64>
17952 // CHECK: ret <1 x i64> [[TMP1]]
test_vcvt_s64_f64(float64x1_t a)17953 int64x1_t test_vcvt_s64_f64(float64x1_t a) {
17954 return vcvt_s64_f64(a);
17955 }
17956
17957 // CHECK-LABEL: @test_vcvt_u64_f64(
17958 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17959 // CHECK: [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64>
17960 // CHECK: ret <1 x i64> [[TMP1]]
test_vcvt_u64_f64(float64x1_t a)17961 uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
17962 return vcvt_u64_f64(a);
17963 }
17964
17965 // CHECK-LABEL: @test_vcvtn_s64_f64(
17966 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17967 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
17968 // CHECK: ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_s64_f64(float64x1_t a)17969 int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
17970 return vcvtn_s64_f64(a);
17971 }
17972
17973 // CHECK-LABEL: @test_vcvtn_u64_f64(
17974 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17975 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
17976 // CHECK: ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_u64_f64(float64x1_t a)17977 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
17978 return vcvtn_u64_f64(a);
17979 }
17980
17981 // CHECK-LABEL: @test_vcvtp_s64_f64(
17982 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17983 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
17984 // CHECK: ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_s64_f64(float64x1_t a)17985 int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
17986 return vcvtp_s64_f64(a);
17987 }
17988
17989 // CHECK-LABEL: @test_vcvtp_u64_f64(
17990 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17991 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
17992 // CHECK: ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_u64_f64(float64x1_t a)17993 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
17994 return vcvtp_u64_f64(a);
17995 }
17996
17997 // CHECK-LABEL: @test_vcvtm_s64_f64(
17998 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17999 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
18000 // CHECK: ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_s64_f64(float64x1_t a)18001 int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
18002 return vcvtm_s64_f64(a);
18003 }
18004
18005 // CHECK-LABEL: @test_vcvtm_u64_f64(
18006 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18007 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
18008 // CHECK: ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_u64_f64(float64x1_t a)18009 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
18010 return vcvtm_u64_f64(a);
18011 }
18012
18013 // CHECK-LABEL: @test_vcvta_s64_f64(
18014 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18015 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
18016 // CHECK: ret <1 x i64> [[VCVTA1_I]]
test_vcvta_s64_f64(float64x1_t a)18017 int64x1_t test_vcvta_s64_f64(float64x1_t a) {
18018 return vcvta_s64_f64(a);
18019 }
18020
18021 // CHECK-LABEL: @test_vcvta_u64_f64(
18022 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18023 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
18024 // CHECK: ret <1 x i64> [[VCVTA1_I]]
test_vcvta_u64_f64(float64x1_t a)18025 uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
18026 return vcvta_u64_f64(a);
18027 }
18028
18029 // CHECK-LABEL: @test_vcvt_f64_s64(
18030 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18031 // CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
18032 // CHECK: ret <1 x double> [[VCVT_I]]
test_vcvt_f64_s64(int64x1_t a)18033 float64x1_t test_vcvt_f64_s64(int64x1_t a) {
18034 return vcvt_f64_s64(a);
18035 }
18036
18037 // CHECK-LABEL: @test_vcvt_f64_u64(
18038 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18039 // CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
18040 // CHECK: ret <1 x double> [[VCVT_I]]
test_vcvt_f64_u64(uint64x1_t a)18041 float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
18042 return vcvt_f64_u64(a);
18043 }
18044
18045 // CHECK-LABEL: @test_vcvt_n_s64_f64(
18046 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18047 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18048 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18049 // CHECK: ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_s64_f64(float64x1_t a)18050 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
18051 return vcvt_n_s64_f64(a, 64);
18052 }
18053
18054 // CHECK-LABEL: @test_vcvt_n_u64_f64(
18055 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18056 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18057 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18058 // CHECK: ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_u64_f64(float64x1_t a)18059 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
18060 return vcvt_n_u64_f64(a, 64);
18061 }
18062
18063 // CHECK-LABEL: @test_vcvt_n_f64_s64(
18064 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18065 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18066 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18067 // CHECK: ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_s64(int64x1_t a)18068 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
18069 return vcvt_n_f64_s64(a, 64);
18070 }
18071
18072 // CHECK-LABEL: @test_vcvt_n_f64_u64(
18073 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18074 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18075 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18076 // CHECK: ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_u64(uint64x1_t a)18077 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
18078 return vcvt_n_f64_u64(a, 64);
18079 }
18080
18081 // CHECK-LABEL: @test_vrndn_f64(
18082 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18083 // CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
18084 // CHECK: ret <1 x double> [[VRNDN1_I]]
test_vrndn_f64(float64x1_t a)18085 float64x1_t test_vrndn_f64(float64x1_t a) {
18086 return vrndn_f64(a);
18087 }
18088
18089 // CHECK-LABEL: @test_vrnda_f64(
18090 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18091 // CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
18092 // CHECK: ret <1 x double> [[VRNDA1_I]]
test_vrnda_f64(float64x1_t a)18093 float64x1_t test_vrnda_f64(float64x1_t a) {
18094 return vrnda_f64(a);
18095 }
18096
18097 // CHECK-LABEL: @test_vrndp_f64(
18098 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18099 // CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
18100 // CHECK: ret <1 x double> [[VRNDP1_I]]
test_vrndp_f64(float64x1_t a)18101 float64x1_t test_vrndp_f64(float64x1_t a) {
18102 return vrndp_f64(a);
18103 }
18104
18105 // CHECK-LABEL: @test_vrndm_f64(
18106 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18107 // CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
18108 // CHECK: ret <1 x double> [[VRNDM1_I]]
test_vrndm_f64(float64x1_t a)18109 float64x1_t test_vrndm_f64(float64x1_t a) {
18110 return vrndm_f64(a);
18111 }
18112
18113 // CHECK-LABEL: @test_vrndx_f64(
18114 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18115 // CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
18116 // CHECK: ret <1 x double> [[VRNDX1_I]]
test_vrndx_f64(float64x1_t a)18117 float64x1_t test_vrndx_f64(float64x1_t a) {
18118 return vrndx_f64(a);
18119 }
18120
18121 // CHECK-LABEL: @test_vrnd_f64(
18122 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18123 // CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
18124 // CHECK: ret <1 x double> [[VRNDZ1_I]]
test_vrnd_f64(float64x1_t a)18125 float64x1_t test_vrnd_f64(float64x1_t a) {
18126 return vrnd_f64(a);
18127 }
18128
18129 // CHECK-LABEL: @test_vrndi_f64(
18130 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18131 // CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
18132 // CHECK: ret <1 x double> [[VRNDI1_I]]
test_vrndi_f64(float64x1_t a)18133 float64x1_t test_vrndi_f64(float64x1_t a) {
18134 return vrndi_f64(a);
18135 }
18136
18137 // CHECK-LABEL: @test_vrsqrte_f64(
18138 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18139 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
18140 // CHECK: ret <1 x double> [[VRSQRTE_V1_I]]
test_vrsqrte_f64(float64x1_t a)18141 float64x1_t test_vrsqrte_f64(float64x1_t a) {
18142 return vrsqrte_f64(a);
18143 }
18144
18145 // CHECK-LABEL: @test_vrecpe_f64(
18146 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18147 // CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
18148 // CHECK: ret <1 x double> [[VRECPE_V1_I]]
test_vrecpe_f64(float64x1_t a)18149 float64x1_t test_vrecpe_f64(float64x1_t a) {
18150 return vrecpe_f64(a);
18151 }
18152
18153 // CHECK-LABEL: @test_vsqrt_f64(
18154 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18155 // CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
18156 // CHECK: ret <1 x double> [[VSQRT_I]]
test_vsqrt_f64(float64x1_t a)18157 float64x1_t test_vsqrt_f64(float64x1_t a) {
18158 return vsqrt_f64(a);
18159 }
18160
18161 // CHECK-LABEL: @test_vrecps_f64(
18162 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18163 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18164 // CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
18165 // CHECK: ret <1 x double> [[VRECPS_V2_I]]
test_vrecps_f64(float64x1_t a,float64x1_t b)18166 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
18167 return vrecps_f64(a, b);
18168 }
18169
18170 // CHECK-LABEL: @test_vrsqrts_f64(
18171 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18172 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18173 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
18174 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
18175 // CHECK: ret <1 x double> [[VRSQRTS_V2_I]]
test_vrsqrts_f64(float64x1_t a,float64x1_t b)18176 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
18177 return vrsqrts_f64(a, b);
18178 }
18179
18180 // CHECK-LABEL: @test_vminv_s32(
18181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18182 // CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
18183 // CHECK: ret i32 [[VMINV_S32_I]]
test_vminv_s32(int32x2_t a)18184 int32_t test_vminv_s32(int32x2_t a) {
18185 return vminv_s32(a);
18186 }
18187
18188 // CHECK-LABEL: @test_vminv_u32(
18189 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18190 // CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
18191 // CHECK: ret i32 [[VMINV_U32_I]]
test_vminv_u32(uint32x2_t a)18192 uint32_t test_vminv_u32(uint32x2_t a) {
18193 return vminv_u32(a);
18194 }
18195
18196 // CHECK-LABEL: @test_vmaxv_s32(
18197 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18198 // CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
18199 // CHECK: ret i32 [[VMAXV_S32_I]]
test_vmaxv_s32(int32x2_t a)18200 int32_t test_vmaxv_s32(int32x2_t a) {
18201 return vmaxv_s32(a);
18202 }
18203
18204 // CHECK-LABEL: @test_vmaxv_u32(
18205 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18206 // CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
18207 // CHECK: ret i32 [[VMAXV_U32_I]]
test_vmaxv_u32(uint32x2_t a)18208 uint32_t test_vmaxv_u32(uint32x2_t a) {
18209 return vmaxv_u32(a);
18210 }
18211
18212 // CHECK-LABEL: @test_vaddv_s32(
18213 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18214 // CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
18215 // CHECK: ret i32 [[VADDV_S32_I]]
test_vaddv_s32(int32x2_t a)18216 int32_t test_vaddv_s32(int32x2_t a) {
18217 return vaddv_s32(a);
18218 }
18219
18220 // CHECK-LABEL: @test_vaddv_u32(
18221 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18222 // CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
18223 // CHECK: ret i32 [[VADDV_U32_I]]
test_vaddv_u32(uint32x2_t a)18224 uint32_t test_vaddv_u32(uint32x2_t a) {
18225 return vaddv_u32(a);
18226 }
18227
18228 // CHECK-LABEL: @test_vaddlv_s32(
18229 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18230 // CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
18231 // CHECK: ret i64 [[VADDLV_S32_I]]
test_vaddlv_s32(int32x2_t a)18232 int64_t test_vaddlv_s32(int32x2_t a) {
18233 return vaddlv_s32(a);
18234 }
18235
18236 // CHECK-LABEL: @test_vaddlv_u32(
18237 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18238 // CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
18239 // CHECK: ret i64 [[VADDLV_U32_I]]
test_vaddlv_u32(uint32x2_t a)18240 uint64_t test_vaddlv_u32(uint32x2_t a) {
18241 return vaddlv_u32(a);
18242 }
18243