1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN:     -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg \
4 // RUN: | FileCheck %s
5 
6 // Test new aarch64 intrinsics and types
7 
8 #include <arm_neon.h>
9 
10 // CHECK-LABEL: @test_vadd_s8(
11 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
12 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t v1,int8x8_t v2)13 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
14   return vadd_s8(v1, v2);
15 }
16 
17 // CHECK-LABEL: @test_vadd_s16(
18 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
19 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t v1,int16x4_t v2)20 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
21   return vadd_s16(v1, v2);
22 }
23 
24 // CHECK-LABEL: @test_vadd_s32(
25 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
26 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t v1,int32x2_t v2)27 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
28   return vadd_s32(v1, v2);
29 }
30 
31 // CHECK-LABEL: @test_vadd_s64(
32 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
33 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t v1,int64x1_t v2)34 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
35   return vadd_s64(v1, v2);
36 }
37 
38 // CHECK-LABEL: @test_vadd_f32(
39 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
40 // CHECK:   ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t v1,float32x2_t v2)41 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
42   return vadd_f32(v1, v2);
43 }
44 
45 // CHECK-LABEL: @test_vadd_u8(
46 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
47 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t v1,uint8x8_t v2)48 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
49   return vadd_u8(v1, v2);
50 }
51 
52 // CHECK-LABEL: @test_vadd_u16(
53 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
54 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t v1,uint16x4_t v2)55 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
56   return vadd_u16(v1, v2);
57 }
58 
59 // CHECK-LABEL: @test_vadd_u32(
60 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
61 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t v1,uint32x2_t v2)62 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
63   return vadd_u32(v1, v2);
64 }
65 
66 // CHECK-LABEL: @test_vadd_u64(
67 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
68 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t v1,uint64x1_t v2)69 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
70   return vadd_u64(v1, v2);
71 }
72 
73 // CHECK-LABEL: @test_vaddq_s8(
74 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
75 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t v1,int8x16_t v2)76 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
77   return vaddq_s8(v1, v2);
78 }
79 
80 // CHECK-LABEL: @test_vaddq_s16(
81 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
82 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t v1,int16x8_t v2)83 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
84   return vaddq_s16(v1, v2);
85 }
86 
87 // CHECK-LABEL: @test_vaddq_s32(
88 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
89 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t v1,int32x4_t v2)90 int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
91   return vaddq_s32(v1, v2);
92 }
93 
94 // CHECK-LABEL: @test_vaddq_s64(
95 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
96 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t v1,int64x2_t v2)97 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
98   return vaddq_s64(v1, v2);
99 }
100 
101 // CHECK-LABEL: @test_vaddq_f32(
102 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
103 // CHECK:   ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t v1,float32x4_t v2)104 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
105   return vaddq_f32(v1, v2);
106 }
107 
108 // CHECK-LABEL: @test_vaddq_f64(
109 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
110 // CHECK:   ret <2 x double> [[ADD_I]]
test_vaddq_f64(float64x2_t v1,float64x2_t v2)111 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
112   return vaddq_f64(v1, v2);
113 }
114 
115 // CHECK-LABEL: @test_vaddq_u8(
116 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
117 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t v1,uint8x16_t v2)118 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
119   return vaddq_u8(v1, v2);
120 }
121 
122 // CHECK-LABEL: @test_vaddq_u16(
123 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
124 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t v1,uint16x8_t v2)125 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
126   return vaddq_u16(v1, v2);
127 }
128 
129 // CHECK-LABEL: @test_vaddq_u32(
130 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
131 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t v1,uint32x4_t v2)132 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
133   return vaddq_u32(v1, v2);
134 }
135 
136 // CHECK-LABEL: @test_vaddq_u64(
137 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
138 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t v1,uint64x2_t v2)139 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
140   return vaddq_u64(v1, v2);
141 }
142 
143 // CHECK-LABEL: @test_vsub_s8(
144 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
145 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t v1,int8x8_t v2)146 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
147   return vsub_s8(v1, v2);
148 }
149 
150 // CHECK-LABEL: @test_vsub_s16(
151 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
152 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t v1,int16x4_t v2)153 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
154   return vsub_s16(v1, v2);
155 }
156 
157 // CHECK-LABEL: @test_vsub_s32(
158 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
159 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t v1,int32x2_t v2)160 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
161   return vsub_s32(v1, v2);
162 }
163 
164 // CHECK-LABEL: @test_vsub_s64(
165 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
166 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t v1,int64x1_t v2)167 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
168   return vsub_s64(v1, v2);
169 }
170 
171 // CHECK-LABEL: @test_vsub_f32(
172 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
173 // CHECK:   ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t v1,float32x2_t v2)174 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
175   return vsub_f32(v1, v2);
176 }
177 
178 // CHECK-LABEL: @test_vsub_u8(
179 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
180 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t v1,uint8x8_t v2)181 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
182   return vsub_u8(v1, v2);
183 }
184 
185 // CHECK-LABEL: @test_vsub_u16(
186 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
187 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t v1,uint16x4_t v2)188 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
189   return vsub_u16(v1, v2);
190 }
191 
192 // CHECK-LABEL: @test_vsub_u32(
193 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
194 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t v1,uint32x2_t v2)195 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
196   return vsub_u32(v1, v2);
197 }
198 
199 // CHECK-LABEL: @test_vsub_u64(
200 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
201 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t v1,uint64x1_t v2)202 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
203   return vsub_u64(v1, v2);
204 }
205 
206 // CHECK-LABEL: @test_vsubq_s8(
207 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
208 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t v1,int8x16_t v2)209 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
210   return vsubq_s8(v1, v2);
211 }
212 
213 // CHECK-LABEL: @test_vsubq_s16(
214 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
215 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t v1,int16x8_t v2)216 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
217   return vsubq_s16(v1, v2);
218 }
219 
220 // CHECK-LABEL: @test_vsubq_s32(
221 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
222 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t v1,int32x4_t v2)223 int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) {
224   return vsubq_s32(v1, v2);
225 }
226 
227 // CHECK-LABEL: @test_vsubq_s64(
228 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
229 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t v1,int64x2_t v2)230 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
231   return vsubq_s64(v1, v2);
232 }
233 
234 // CHECK-LABEL: @test_vsubq_f32(
235 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
236 // CHECK:   ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t v1,float32x4_t v2)237 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
238   return vsubq_f32(v1, v2);
239 }
240 
241 // CHECK-LABEL: @test_vsubq_f64(
242 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
243 // CHECK:   ret <2 x double> [[SUB_I]]
test_vsubq_f64(float64x2_t v1,float64x2_t v2)244 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
245   return vsubq_f64(v1, v2);
246 }
247 
248 // CHECK-LABEL: @test_vsubq_u8(
249 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
250 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t v1,uint8x16_t v2)251 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
252   return vsubq_u8(v1, v2);
253 }
254 
255 // CHECK-LABEL: @test_vsubq_u16(
256 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
257 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t v1,uint16x8_t v2)258 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
259   return vsubq_u16(v1, v2);
260 }
261 
262 // CHECK-LABEL: @test_vsubq_u32(
263 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
264 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t v1,uint32x4_t v2)265 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
266   return vsubq_u32(v1, v2);
267 }
268 
269 // CHECK-LABEL: @test_vsubq_u64(
270 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
271 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t v1,uint64x2_t v2)272 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
273   return vsubq_u64(v1, v2);
274 }
275 
276 // CHECK-LABEL: @test_vmul_s8(
277 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
278 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t v1,int8x8_t v2)279 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
280   return vmul_s8(v1, v2);
281 }
282 
283 // CHECK-LABEL: @test_vmul_s16(
284 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
285 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t v1,int16x4_t v2)286 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
287   return vmul_s16(v1, v2);
288 }
289 
290 // CHECK-LABEL: @test_vmul_s32(
291 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
292 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t v1,int32x2_t v2)293 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
294   return vmul_s32(v1, v2);
295 }
296 
297 // CHECK-LABEL: @test_vmul_f32(
298 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
299 // CHECK:   ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t v1,float32x2_t v2)300 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
301   return vmul_f32(v1, v2);
302 }
303 
304 // CHECK-LABEL: @test_vmul_u8(
305 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
306 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t v1,uint8x8_t v2)307 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
308   return vmul_u8(v1, v2);
309 }
310 
311 // CHECK-LABEL: @test_vmul_u16(
312 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
313 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t v1,uint16x4_t v2)314 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
315   return vmul_u16(v1, v2);
316 }
317 
318 // CHECK-LABEL: @test_vmul_u32(
319 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
320 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t v1,uint32x2_t v2)321 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
322   return vmul_u32(v1, v2);
323 }
324 
325 // CHECK-LABEL: @test_vmulq_s8(
326 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
327 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t v1,int8x16_t v2)328 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
329   return vmulq_s8(v1, v2);
330 }
331 
332 // CHECK-LABEL: @test_vmulq_s16(
333 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
334 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t v1,int16x8_t v2)335 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
336   return vmulq_s16(v1, v2);
337 }
338 
339 // CHECK-LABEL: @test_vmulq_s32(
340 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
341 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t v1,int32x4_t v2)342 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
343   return vmulq_s32(v1, v2);
344 }
345 
346 // CHECK-LABEL: @test_vmulq_u8(
347 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
348 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t v1,uint8x16_t v2)349 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
350   return vmulq_u8(v1, v2);
351 }
352 
353 // CHECK-LABEL: @test_vmulq_u16(
354 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
355 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t v1,uint16x8_t v2)356 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
357   return vmulq_u16(v1, v2);
358 }
359 
360 // CHECK-LABEL: @test_vmulq_u32(
361 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
362 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t v1,uint32x4_t v2)363 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
364   return vmulq_u32(v1, v2);
365 }
366 
367 // CHECK-LABEL: @test_vmulq_f32(
368 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
369 // CHECK:   ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t v1,float32x4_t v2)370 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
371   return vmulq_f32(v1, v2);
372 }
373 
374 // CHECK-LABEL: @test_vmulq_f64(
375 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
376 // CHECK:   ret <2 x double> [[MUL_I]]
test_vmulq_f64(float64x2_t v1,float64x2_t v2)377 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
378   return vmulq_f64(v1, v2);
379 }
380 
381 // CHECK-LABEL: @test_vmul_p8(
382 // CHECK:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
383 // CHECK:   ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t v1,poly8x8_t v2)384 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
385   return vmul_p8(v1, v2);
386 }
387 
388 // CHECK-LABEL: @test_vmulq_p8(
389 // CHECK:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
390 // CHECK:   ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t v1,poly8x16_t v2)391 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
392   return vmulq_p8(v1, v2);
393 }
394 
395 // CHECK-LABEL: @test_vmla_s8(
396 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
397 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
398 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)399 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
400   return vmla_s8(v1, v2, v3);
401 }
402 
403 // CHECK-LABEL: @test_vmla_s16(
404 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
405 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
406 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
407 // CHECK:   ret <8 x i8> [[TMP0]]
test_vmla_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)408 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
409   return vmla_s16(v1, v2, v3);
410 }
411 
412 // CHECK-LABEL: @test_vmla_s32(
413 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
414 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
415 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)416 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
417   return vmla_s32(v1, v2, v3);
418 }
419 
420 // CHECK-LABEL: @test_vmla_f32(
421 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
422 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
423 // CHECK:   ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)424 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
425   return vmla_f32(v1, v2, v3);
426 }
427 
428 // CHECK-LABEL: @test_vmla_u8(
429 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
430 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
431 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)432 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
433   return vmla_u8(v1, v2, v3);
434 }
435 
436 // CHECK-LABEL: @test_vmla_u16(
437 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
438 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
439 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)440 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
441   return vmla_u16(v1, v2, v3);
442 }
443 
444 // CHECK-LABEL: @test_vmla_u32(
445 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
446 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
447 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)448 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
449   return vmla_u32(v1, v2, v3);
450 }
451 
452 // CHECK-LABEL: @test_vmlaq_s8(
453 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
454 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
455 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)456 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
457   return vmlaq_s8(v1, v2, v3);
458 }
459 
460 // CHECK-LABEL: @test_vmlaq_s16(
461 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
462 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
463 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)464 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
465   return vmlaq_s16(v1, v2, v3);
466 }
467 
468 // CHECK-LABEL: @test_vmlaq_s32(
469 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
470 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
471 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)472 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
473   return vmlaq_s32(v1, v2, v3);
474 }
475 
476 // CHECK-LABEL: @test_vmlaq_f32(
477 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
478 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
479 // CHECK:   ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)480 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
481   return vmlaq_f32(v1, v2, v3);
482 }
483 
484 // CHECK-LABEL: @test_vmlaq_u8(
485 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
486 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
487 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)488 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
489   return vmlaq_u8(v1, v2, v3);
490 }
491 
492 // CHECK-LABEL: @test_vmlaq_u16(
493 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
494 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
495 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)496 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
497   return vmlaq_u16(v1, v2, v3);
498 }
499 
500 // CHECK-LABEL: @test_vmlaq_u32(
501 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
502 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
503 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)504 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
505   return vmlaq_u32(v1, v2, v3);
506 }
507 
508 // CHECK-LABEL: @test_vmlaq_f64(
509 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
510 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
511 // CHECK:   ret <2 x double> [[ADD_I]]
test_vmlaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)512 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
513   return vmlaq_f64(v1, v2, v3);
514 }
515 
516 // CHECK-LABEL: @test_vmls_s8(
517 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
518 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
519 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)520 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
521   return vmls_s8(v1, v2, v3);
522 }
523 
524 // CHECK-LABEL: @test_vmls_s16(
525 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
526 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
527 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
528 // CHECK:   ret <8 x i8> [[TMP0]]
test_vmls_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)529 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
530   return vmls_s16(v1, v2, v3);
531 }
532 
533 // CHECK-LABEL: @test_vmls_s32(
534 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
535 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
536 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)537 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
538   return vmls_s32(v1, v2, v3);
539 }
540 
541 // CHECK-LABEL: @test_vmls_f32(
542 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
543 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
544 // CHECK:   ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)545 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
546   return vmls_f32(v1, v2, v3);
547 }
548 
549 // CHECK-LABEL: @test_vmls_u8(
550 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
551 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
552 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)553 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
554   return vmls_u8(v1, v2, v3);
555 }
556 
557 // CHECK-LABEL: @test_vmls_u16(
558 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
559 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
560 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)561 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
562   return vmls_u16(v1, v2, v3);
563 }
564 
565 // CHECK-LABEL: @test_vmls_u32(
566 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
567 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
568 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)569 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
570   return vmls_u32(v1, v2, v3);
571 }
572 
573 // CHECK-LABEL: @test_vmlsq_s8(
574 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
575 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
576 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)577 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
578   return vmlsq_s8(v1, v2, v3);
579 }
580 
581 // CHECK-LABEL: @test_vmlsq_s16(
582 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
583 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
584 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)585 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
586   return vmlsq_s16(v1, v2, v3);
587 }
588 
589 // CHECK-LABEL: @test_vmlsq_s32(
590 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
591 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
592 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)593 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
594   return vmlsq_s32(v1, v2, v3);
595 }
596 
597 // CHECK-LABEL: @test_vmlsq_f32(
598 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
599 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
600 // CHECK:   ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)601 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
602   return vmlsq_f32(v1, v2, v3);
603 }
604 
605 // CHECK-LABEL: @test_vmlsq_u8(
606 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
607 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
608 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)609 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
610   return vmlsq_u8(v1, v2, v3);
611 }
612 
613 // CHECK-LABEL: @test_vmlsq_u16(
614 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
615 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
616 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)617 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
618   return vmlsq_u16(v1, v2, v3);
619 }
620 
621 // CHECK-LABEL: @test_vmlsq_u32(
622 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
623 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
624 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)625 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
626   return vmlsq_u32(v1, v2, v3);
627 }
628 
629 // CHECK-LABEL: @test_vmlsq_f64(
630 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
631 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
632 // CHECK:   ret <2 x double> [[SUB_I]]
test_vmlsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)633 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
634   return vmlsq_f64(v1, v2, v3);
635 }
636 
637 // CHECK-LABEL: @test_vfma_f32(
638 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
639 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
640 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
641 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
642 // CHECK:   ret <2 x float> [[TMP3]]
test_vfma_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)643 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
644   return vfma_f32(v1, v2, v3);
645 }
646 
647 // CHECK-LABEL: @test_vfmaq_f32(
648 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
649 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
650 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
651 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
652 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)653 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
654   return vfmaq_f32(v1, v2, v3);
655 }
656 
657 // CHECK-LABEL: @test_vfmaq_f64(
658 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
659 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
660 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
661 // CHECK:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
662 // CHECK:   ret <2 x double> [[TMP3]]
test_vfmaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)663 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
664   return vfmaq_f64(v1, v2, v3);
665 }
666 
667 // CHECK-LABEL: @test_vfms_f32(
668 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2
669 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
670 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
671 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
672 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
673 // CHECK:   ret <2 x float> [[TMP3]]
test_vfms_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)674 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
675   return vfms_f32(v1, v2, v3);
676 }
677 
678 // CHECK-LABEL: @test_vfmsq_f32(
679 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2
680 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
681 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
682 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
683 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
684 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)685 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
686   return vfmsq_f32(v1, v2, v3);
687 }
688 
689 // CHECK-LABEL: @test_vfmsq_f64(
690 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2
691 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
692 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
693 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
694 // CHECK:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
695 // CHECK:   ret <2 x double> [[TMP3]]
test_vfmsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)696 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
697   return vfmsq_f64(v1, v2, v3);
698 }
699 
700 // CHECK-LABEL: @test_vdivq_f64(
701 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
702 // CHECK:   ret <2 x double> [[DIV_I]]
test_vdivq_f64(float64x2_t v1,float64x2_t v2)703 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
704   return vdivq_f64(v1, v2);
705 }
706 
707 // CHECK-LABEL: @test_vdivq_f32(
708 // CHECK:   [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
709 // CHECK:   ret <4 x float> [[DIV_I]]
test_vdivq_f32(float32x4_t v1,float32x4_t v2)710 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
711   return vdivq_f32(v1, v2);
712 }
713 
714 // CHECK-LABEL: @test_vdiv_f32(
715 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
716 // CHECK:   ret <2 x float> [[DIV_I]]
test_vdiv_f32(float32x2_t v1,float32x2_t v2)717 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
718   return vdiv_f32(v1, v2);
719 }
720 
721 // CHECK-LABEL: @test_vaba_s8(
722 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
723 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
724 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)725 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
726   return vaba_s8(v1, v2, v3);
727 }
728 
729 // CHECK-LABEL: @test_vaba_s16(
730 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
731 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
732 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
733 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
734 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)735 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
736   return vaba_s16(v1, v2, v3);
737 }
738 
739 // CHECK-LABEL: @test_vaba_s32(
740 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
741 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
742 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
743 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
744 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)745 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
746   return vaba_s32(v1, v2, v3);
747 }
748 
749 // CHECK-LABEL: @test_vaba_u8(
750 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
751 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
752 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)753 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
754   return vaba_u8(v1, v2, v3);
755 }
756 
757 // CHECK-LABEL: @test_vaba_u16(
758 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
759 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
760 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
761 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
762 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)763 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
764   return vaba_u16(v1, v2, v3);
765 }
766 
767 // CHECK-LABEL: @test_vaba_u32(
768 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
769 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
770 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
771 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
772 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)773 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
774   return vaba_u32(v1, v2, v3);
775 }
776 
777 // CHECK-LABEL: @test_vabaq_s8(
778 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
779 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
780 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)781 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
782   return vabaq_s8(v1, v2, v3);
783 }
784 
785 // CHECK-LABEL: @test_vabaq_s16(
786 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
787 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
788 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
789 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
790 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)791 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
792   return vabaq_s16(v1, v2, v3);
793 }
794 
795 // CHECK-LABEL: @test_vabaq_s32(
796 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
797 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
798 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
799 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
800 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)801 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
802   return vabaq_s32(v1, v2, v3);
803 }
804 
805 // CHECK-LABEL: @test_vabaq_u8(
806 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
807 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
808 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)809 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
810   return vabaq_u8(v1, v2, v3);
811 }
812 
813 // CHECK-LABEL: @test_vabaq_u16(
814 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
815 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
816 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
817 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
818 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)819 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
820   return vabaq_u16(v1, v2, v3);
821 }
822 
823 // CHECK-LABEL: @test_vabaq_u32(
824 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
825 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
826 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
827 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
828 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)829 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
830   return vabaq_u32(v1, v2, v3);
831 }
832 
833 // CHECK-LABEL: @test_vabd_s8(
834 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
835 // CHECK:   ret <8 x i8> [[VABD_I]]
test_vabd_s8(int8x8_t v1,int8x8_t v2)836 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
837   return vabd_s8(v1, v2);
838 }
839 
840 // CHECK-LABEL: @test_vabd_s16(
841 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
842 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
843 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
844 // CHECK:   ret <4 x i16> [[VABD2_I]]
test_vabd_s16(int16x4_t v1,int16x4_t v2)845 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
846   return vabd_s16(v1, v2);
847 }
848 
849 // CHECK-LABEL: @test_vabd_s32(
850 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
851 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
852 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
853 // CHECK:   ret <2 x i32> [[VABD2_I]]
test_vabd_s32(int32x2_t v1,int32x2_t v2)854 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
855   return vabd_s32(v1, v2);
856 }
857 
858 // CHECK-LABEL: @test_vabd_u8(
859 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
860 // CHECK:   ret <8 x i8> [[VABD_I]]
test_vabd_u8(uint8x8_t v1,uint8x8_t v2)861 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
862   return vabd_u8(v1, v2);
863 }
864 
865 // CHECK-LABEL: @test_vabd_u16(
866 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
867 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
868 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
869 // CHECK:   ret <4 x i16> [[VABD2_I]]
test_vabd_u16(uint16x4_t v1,uint16x4_t v2)870 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
871   return vabd_u16(v1, v2);
872 }
873 
874 // CHECK-LABEL: @test_vabd_u32(
875 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
876 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
877 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
878 // CHECK:   ret <2 x i32> [[VABD2_I]]
test_vabd_u32(uint32x2_t v1,uint32x2_t v2)879 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
880   return vabd_u32(v1, v2);
881 }
882 
883 // CHECK-LABEL: @test_vabd_f32(
884 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
885 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
886 // CHECK:   [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2)
887 // CHECK:   ret <2 x float> [[VABD2_I]]
test_vabd_f32(float32x2_t v1,float32x2_t v2)888 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
889   return vabd_f32(v1, v2);
890 }
891 
892 // CHECK-LABEL: @test_vabdq_s8(
893 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
894 // CHECK:   ret <16 x i8> [[VABD_I]]
test_vabdq_s8(int8x16_t v1,int8x16_t v2)895 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
896   return vabdq_s8(v1, v2);
897 }
898 
899 // CHECK-LABEL: @test_vabdq_s16(
900 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
901 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
902 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
903 // CHECK:   ret <8 x i16> [[VABD2_I]]
test_vabdq_s16(int16x8_t v1,int16x8_t v2)904 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
905   return vabdq_s16(v1, v2);
906 }
907 
908 // CHECK-LABEL: @test_vabdq_s32(
909 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
910 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
911 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
912 // CHECK:   ret <4 x i32> [[VABD2_I]]
test_vabdq_s32(int32x4_t v1,int32x4_t v2)913 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
914   return vabdq_s32(v1, v2);
915 }
916 
917 // CHECK-LABEL: @test_vabdq_u8(
918 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
919 // CHECK:   ret <16 x i8> [[VABD_I]]
test_vabdq_u8(uint8x16_t v1,uint8x16_t v2)920 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
921   return vabdq_u8(v1, v2);
922 }
923 
924 // CHECK-LABEL: @test_vabdq_u16(
925 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
926 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
927 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
928 // CHECK:   ret <8 x i16> [[VABD2_I]]
test_vabdq_u16(uint16x8_t v1,uint16x8_t v2)929 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
930   return vabdq_u16(v1, v2);
931 }
932 
933 // CHECK-LABEL: @test_vabdq_u32(
934 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
935 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
936 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
937 // CHECK:   ret <4 x i32> [[VABD2_I]]
test_vabdq_u32(uint32x4_t v1,uint32x4_t v2)938 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
939   return vabdq_u32(v1, v2);
940 }
941 
942 // CHECK-LABEL: @test_vabdq_f32(
943 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
944 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
945 // CHECK:   [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2)
946 // CHECK:   ret <4 x float> [[VABD2_I]]
test_vabdq_f32(float32x4_t v1,float32x4_t v2)947 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
948   return vabdq_f32(v1, v2);
949 }
950 
951 // CHECK-LABEL: @test_vabdq_f64(
952 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
953 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
954 // CHECK:   [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2)
955 // CHECK:   ret <2 x double> [[VABD2_I]]
test_vabdq_f64(float64x2_t v1,float64x2_t v2)956 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
957   return vabdq_f64(v1, v2);
958 }
959 
960 // CHECK-LABEL: @test_vbsl_s8(
961 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
962 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
963 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
964 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
965 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_s8(uint8x8_t v1,int8x8_t v2,int8x8_t v3)966 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
967   return vbsl_s8(v1, v2, v3);
968 }
969 
970 // CHECK-LABEL: @test_vbsl_s16(
971 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
972 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
973 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
974 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
975 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
976 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
977 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
978 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
979 // CHECK:   ret <8 x i8> [[TMP4]]
test_vbsl_s16(uint16x4_t v1,int16x4_t v2,int16x4_t v3)980 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
981   return vbsl_s16(v1, v2, v3);
982 }
983 
984 // CHECK-LABEL: @test_vbsl_s32(
985 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
986 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
987 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
988 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
989 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
990 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
991 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
992 // CHECK:   ret <2 x i32> [[VBSL5_I]]
test_vbsl_s32(uint32x2_t v1,int32x2_t v2,int32x2_t v3)993 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
994   return vbsl_s32(v1, v2, v3);
995 }
996 
997 // CHECK-LABEL: @test_vbsl_s64(
998 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
999 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1000 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1001 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1002 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1003 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1004 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1005 // CHECK:   ret <1 x i64> [[VBSL5_I]]
test_vbsl_s64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1006 uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1007   return vbsl_s64(v1, v2, v3);
1008 }
1009 
1010 // CHECK-LABEL: @test_vbsl_u8(
1011 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1012 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1013 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1014 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1015 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)1016 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
1017   return vbsl_u8(v1, v2, v3);
1018 }
1019 
1020 // CHECK-LABEL: @test_vbsl_u16(
1021 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1022 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1023 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1024 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1025 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1026 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1027 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1028 // CHECK:   ret <4 x i16> [[VBSL5_I]]
test_vbsl_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)1029 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
1030   return vbsl_u16(v1, v2, v3);
1031 }
1032 
1033 // CHECK-LABEL: @test_vbsl_u32(
1034 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1035 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1036 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1037 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
1038 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1039 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
1040 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1041 // CHECK:   ret <2 x i32> [[VBSL5_I]]
test_vbsl_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)1042 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
1043   return vbsl_u32(v1, v2, v3);
1044 }
1045 
1046 // CHECK-LABEL: @test_vbsl_u64(
1047 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1048 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1049 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1050 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1051 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1052 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1053 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1054 // CHECK:   ret <1 x i64> [[VBSL5_I]]
test_vbsl_u64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1055 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1056   return vbsl_u64(v1, v2, v3);
1057 }
1058 
1059 // CHECK-LABEL: @test_vbsl_f32(
1060 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32>
1061 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
1062 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1063 // CHECK:   [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1064 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1065 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1066 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> [[TMP0]], [[VBSL1_I]]
1067 // CHECK:   [[TMP4:%.*]] = xor <2 x i32> [[TMP0]], <i32 -1, i32 -1>
1068 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1069 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1070 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1071 // CHECK:   ret <2 x float> [[TMP5]]
test_vbsl_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)1072 float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
1073   return vbsl_f32(v1, v2, v3);
1074 }
1075 
1076 // CHECK-LABEL: @test_vbsl_f64(
1077 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1078 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1079 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1080 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1081 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1082 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]]
1083 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1084 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1085 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1086 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1087 // CHECK:   ret <1 x double> [[TMP4]]
test_vbsl_f64(uint64x1_t v1,float64x1_t v2,float64x1_t v3)1088 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
1089   return vbsl_f64(v1, v2, v3);
1090 }
1091 
1092 // CHECK-LABEL: @test_vbsl_p8(
1093 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1094 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1095 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1096 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1097 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_p8(uint8x8_t v1,poly8x8_t v2,poly8x8_t v3)1098 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
1099   return vbsl_p8(v1, v2, v3);
1100 }
1101 
1102 // CHECK-LABEL: @test_vbsl_p16(
1103 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1104 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1105 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1106 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1107 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1108 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1109 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1110 // CHECK:   ret <4 x i16> [[VBSL5_I]]
test_vbsl_p16(uint16x4_t v1,poly16x4_t v2,poly16x4_t v3)1111 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
1112   return vbsl_p16(v1, v2, v3);
1113 }
1114 
1115 // CHECK-LABEL: @test_vbslq_s8(
1116 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1117 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1118 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1119 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1120 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_s8(uint8x16_t v1,int8x16_t v2,int8x16_t v3)1121 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
1122   return vbslq_s8(v1, v2, v3);
1123 }
1124 
1125 // CHECK-LABEL: @test_vbslq_s16(
1126 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1127 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1128 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1129 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1130 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1131 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1132 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1133 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_s16(uint16x8_t v1,int16x8_t v2,int16x8_t v3)1134 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
1135   return vbslq_s16(v1, v2, v3);
1136 }
1137 
1138 // CHECK-LABEL: @test_vbslq_s32(
1139 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1140 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1141 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1142 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1143 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1144 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1145 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146 // CHECK:   ret <4 x i32> [[VBSL5_I]]
test_vbslq_s32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1147 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1148   return vbslq_s32(v1, v2, v3);
1149 }
1150 
1151 // CHECK-LABEL: @test_vbslq_s64(
1152 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1153 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1154 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1155 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1156 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1157 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1158 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1159 // CHECK:   ret <2 x i64> [[VBSL5_I]]
test_vbslq_s64(uint64x2_t v1,int64x2_t v2,int64x2_t v3)1160 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
1161   return vbslq_s64(v1, v2, v3);
1162 }
1163 
1164 // CHECK-LABEL: @test_vbslq_u8(
1165 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1166 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1167 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1168 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1169 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)1170 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
1171   return vbslq_u8(v1, v2, v3);
1172 }
1173 
1174 // CHECK-LABEL: @test_vbslq_u16(
1175 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1176 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1177 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1178 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1179 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1180 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1181 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1182 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)1183 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
1184   return vbslq_u16(v1, v2, v3);
1185 }
1186 
1187 // CHECK-LABEL: @test_vbslq_u32(
1188 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1189 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1190 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1191 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1192 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1193 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1194 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1195 // CHECK:   ret <4 x i32> [[VBSL5_I]]
test_vbslq_u32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1196 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1197   return vbslq_s32(v1, v2, v3);
1198 }
1199 
1200 // CHECK-LABEL: @test_vbslq_u64(
1201 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1202 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1203 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1204 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1205 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1206 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1207 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1208 // CHECK:   ret <2 x i64> [[VBSL5_I]]
test_vbslq_u64(uint64x2_t v1,uint64x2_t v2,uint64x2_t v3)1209 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
1210   return vbslq_u64(v1, v2, v3);
1211 }
1212 
1213 // CHECK-LABEL: @test_vbslq_f32(
1214 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1215 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1216 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1217 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1218 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1219 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]]
1220 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1221 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1222 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1223 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1224 // CHECK:   ret <4 x float> [[TMP4]]
test_vbslq_f32(uint32x4_t v1,float32x4_t v2,float32x4_t v3)1225 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
1226   return vbslq_f32(v1, v2, v3);
1227 }
1228 
1229 // CHECK-LABEL: @test_vbslq_p8(
1230 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1231 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1232 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1233 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1234 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_p8(uint8x16_t v1,poly8x16_t v2,poly8x16_t v3)1235 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
1236   return vbslq_p8(v1, v2, v3);
1237 }
1238 
1239 // CHECK-LABEL: @test_vbslq_p16(
1240 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1241 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1242 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1243 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1244 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1245 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1246 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1247 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_p16(uint16x8_t v1,poly16x8_t v2,poly16x8_t v3)1248 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
1249   return vbslq_p16(v1, v2, v3);
1250 }
1251 
1252 // CHECK-LABEL: @test_vbslq_f64(
1253 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1254 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1255 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1256 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1257 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1258 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]]
1259 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1260 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1261 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1262 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1263 // CHECK:   ret <2 x double> [[TMP4]]
test_vbslq_f64(uint64x2_t v1,float64x2_t v2,float64x2_t v3)1264 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
1265   return vbslq_f64(v1, v2, v3);
1266 }
1267 
1268 // CHECK-LABEL: @test_vrecps_f32(
1269 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1270 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1271 // CHECK:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
1272 // CHECK:   ret <2 x float> [[VRECPS_V2_I]]
test_vrecps_f32(float32x2_t v1,float32x2_t v2)1273 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
1274   return vrecps_f32(v1, v2);
1275 }
1276 
1277 // CHECK-LABEL: @test_vrecpsq_f32(
1278 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1279 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1280 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
1281 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1282 // CHECK:   ret <4 x float> [[VRECPSQ_V2_I]]
test_vrecpsq_f32(float32x4_t v1,float32x4_t v2)1283 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
1284   return vrecpsq_f32(v1, v2);
1285 }
1286 
1287 // CHECK-LABEL: @test_vrecpsq_f64(
1288 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1289 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1290 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
1291 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1292 // CHECK:   ret <2 x double> [[VRECPSQ_V2_I]]
test_vrecpsq_f64(float64x2_t v1,float64x2_t v2)1293 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
1294   return vrecpsq_f64(v1, v2);
1295 }
1296 
1297 // CHECK-LABEL: @test_vrsqrts_f32(
1298 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1299 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1300 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
1301 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1302 // CHECK:   ret <2 x float> [[VRSQRTS_V2_I]]
test_vrsqrts_f32(float32x2_t v1,float32x2_t v2)1303 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
1304   return vrsqrts_f32(v1, v2);
1305 }
1306 
1307 // CHECK-LABEL: @test_vrsqrtsq_f32(
1308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1309 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1310 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
1311 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1312 // CHECK:   ret <4 x float> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f32(float32x4_t v1,float32x4_t v2)1313 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
1314   return vrsqrtsq_f32(v1, v2);
1315 }
1316 
1317 // CHECK-LABEL: @test_vrsqrtsq_f64(
1318 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1319 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1320 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
1321 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1322 // CHECK:   ret <2 x double> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f64(float64x2_t v1,float64x2_t v2)1323 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
1324   return vrsqrtsq_f64(v1, v2);
1325 }
1326 
1327 // CHECK-LABEL: @test_vcage_f32(
1328 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1329 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1330 // CHECK:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1331 // CHECK:   ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t v1,float32x2_t v2)1332 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
1333   return vcage_f32(v1, v2);
1334 }
1335 
1336 // CHECK-LABEL: @test_vcage_f64(
1337 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1338 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1339 // CHECK:   [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1340 // CHECK:   ret <1 x i64> [[VCAGE_V2_I]]
test_vcage_f64(float64x1_t a,float64x1_t b)1341 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
1342   return vcage_f64(a, b);
1343 }
1344 
1345 // CHECK-LABEL: @test_vcageq_f32(
1346 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1347 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1348 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1349 // CHECK:   ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t v1,float32x4_t v2)1350 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
1351   return vcageq_f32(v1, v2);
1352 }
1353 
1354 // CHECK-LABEL: @test_vcageq_f64(
1355 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1356 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1358 // CHECK:   ret <2 x i64> [[VCAGEQ_V2_I]]
test_vcageq_f64(float64x2_t v1,float64x2_t v2)1359 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
1360   return vcageq_f64(v1, v2);
1361 }
1362 
1363 // CHECK-LABEL: @test_vcagt_f32(
1364 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1365 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1366 // CHECK:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1367 // CHECK:   ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t v1,float32x2_t v2)1368 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
1369   return vcagt_f32(v1, v2);
1370 }
1371 
1372 // CHECK-LABEL: @test_vcagt_f64(
1373 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1374 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1375 // CHECK:   [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1376 // CHECK:   ret <1 x i64> [[VCAGT_V2_I]]
test_vcagt_f64(float64x1_t a,float64x1_t b)1377 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
1378   return vcagt_f64(a, b);
1379 }
1380 
1381 // CHECK-LABEL: @test_vcagtq_f32(
1382 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1383 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1384 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1385 // CHECK:   ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t v1,float32x4_t v2)1386 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
1387   return vcagtq_f32(v1, v2);
1388 }
1389 
1390 // CHECK-LABEL: @test_vcagtq_f64(
1391 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1392 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1393 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1394 // CHECK:   ret <2 x i64> [[VCAGTQ_V2_I]]
test_vcagtq_f64(float64x2_t v1,float64x2_t v2)1395 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
1396   return vcagtq_f64(v1, v2);
1397 }
1398 
1399 // CHECK-LABEL: @test_vcale_f32(
1400 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1401 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1402 // CHECK:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1403 // CHECK:   ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t v1,float32x2_t v2)1404 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
1405   return vcale_f32(v1, v2);
1406   // Using registers other than v0, v1 are possible, but would be odd.
1407 }
1408 
1409 // CHECK-LABEL: @test_vcale_f64(
1410 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1411 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1412 // CHECK:   [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1413 // CHECK:   ret <1 x i64> [[VCALE_V2_I]]
test_vcale_f64(float64x1_t a,float64x1_t b)1414 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
1415   return vcale_f64(a, b);
1416 }
1417 
1418 // CHECK-LABEL: @test_vcaleq_f32(
1419 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1420 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1421 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1422 // CHECK:   ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t v1,float32x4_t v2)1423 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
1424   return vcaleq_f32(v1, v2);
1425   // Using registers other than v0, v1 are possible, but would be odd.
1426 }
1427 
1428 // CHECK-LABEL: @test_vcaleq_f64(
1429 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1430 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1431 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1432 // CHECK:   ret <2 x i64> [[VCALEQ_V2_I]]
test_vcaleq_f64(float64x2_t v1,float64x2_t v2)1433 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
1434   return vcaleq_f64(v1, v2);
1435   // Using registers other than v0, v1 are possible, but would be odd.
1436 }
1437 
1438 // CHECK-LABEL: @test_vcalt_f32(
1439 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1440 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1441 // CHECK:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1442 // CHECK:   ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t v1,float32x2_t v2)1443 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
1444   return vcalt_f32(v1, v2);
1445   // Using registers other than v0, v1 are possible, but would be odd.
1446 }
1447 
1448 // CHECK-LABEL: @test_vcalt_f64(
1449 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1450 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1451 // CHECK:   [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1452 // CHECK:   ret <1 x i64> [[VCALT_V2_I]]
test_vcalt_f64(float64x1_t a,float64x1_t b)1453 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
1454   return vcalt_f64(a, b);
1455 }
1456 
1457 // CHECK-LABEL: @test_vcaltq_f32(
1458 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1459 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1460 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1461 // CHECK:   ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t v1,float32x4_t v2)1462 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
1463   return vcaltq_f32(v1, v2);
1464   // Using registers other than v0, v1 are possible, but would be odd.
1465 }
1466 
1467 // CHECK-LABEL: @test_vcaltq_f64(
1468 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1469 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1470 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1471 // CHECK:   ret <2 x i64> [[VCALTQ_V2_I]]
test_vcaltq_f64(float64x2_t v1,float64x2_t v2)1472 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
1473   return vcaltq_f64(v1, v2);
1474   // Using registers other than v0, v1 are possible, but would be odd.
1475 }
1476 
1477 // CHECK-LABEL: @test_vtst_s8(
1478 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1479 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1480 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1481 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t v1,int8x8_t v2)1482 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
1483   return vtst_s8(v1, v2);
1484 }
1485 
1486 // CHECK-LABEL: @test_vtst_s16(
1487 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1488 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1489 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1490 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1491 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1492 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t v1,int16x4_t v2)1493 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
1494   return vtst_s16(v1, v2);
1495 }
1496 
1497 // CHECK-LABEL: @test_vtst_s32(
1498 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1499 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1500 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1501 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1502 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1503 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t v1,int32x2_t v2)1504 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
1505   return vtst_s32(v1, v2);
1506 }
1507 
1508 // CHECK-LABEL: @test_vtst_u8(
1509 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1510 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1511 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1512 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t v1,uint8x8_t v2)1513 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
1514   return vtst_u8(v1, v2);
1515 }
1516 
1517 // CHECK-LABEL: @test_vtst_u16(
1518 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1519 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1520 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1521 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1522 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1523 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t v1,uint16x4_t v2)1524 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
1525   return vtst_u16(v1, v2);
1526 }
1527 
1528 // CHECK-LABEL: @test_vtst_u32(
1529 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1530 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1531 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1532 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1533 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1534 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t v1,uint32x2_t v2)1535 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
1536   return vtst_u32(v1, v2);
1537 }
1538 
1539 // CHECK-LABEL: @test_vtstq_s8(
1540 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1541 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1542 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1543 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t v1,int8x16_t v2)1544 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
1545   return vtstq_s8(v1, v2);
1546 }
1547 
1548 // CHECK-LABEL: @test_vtstq_s16(
1549 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1550 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1551 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1552 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1553 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1554 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t v1,int16x8_t v2)1555 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
1556   return vtstq_s16(v1, v2);
1557 }
1558 
1559 // CHECK-LABEL: @test_vtstq_s32(
1560 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1561 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1562 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1563 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1564 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1565 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t v1,int32x4_t v2)1566 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
1567   return vtstq_s32(v1, v2);
1568 }
1569 
1570 // CHECK-LABEL: @test_vtstq_u8(
1571 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1572 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1573 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1574 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t v1,uint8x16_t v2)1575 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
1576   return vtstq_u8(v1, v2);
1577 }
1578 
1579 // CHECK-LABEL: @test_vtstq_u16(
1580 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1581 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1582 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1583 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1584 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1585 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t v1,uint16x8_t v2)1586 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
1587   return vtstq_u16(v1, v2);
1588 }
1589 
1590 // CHECK-LABEL: @test_vtstq_u32(
1591 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1592 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1593 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1594 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1595 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1596 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t v1,uint32x4_t v2)1597 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
1598   return vtstq_u32(v1, v2);
1599 }
1600 
1601 // CHECK-LABEL: @test_vtstq_s64(
1602 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1603 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1604 // CHECK:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1605 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1606 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1607 // CHECK:   ret <2 x i64> [[VTST_I]]
test_vtstq_s64(int64x2_t v1,int64x2_t v2)1608 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
1609   return vtstq_s64(v1, v2);
1610 }
1611 
1612 // CHECK-LABEL: @test_vtstq_u64(
1613 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1614 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1615 // CHECK:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1616 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1617 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1618 // CHECK:   ret <2 x i64> [[VTST_I]]
test_vtstq_u64(uint64x2_t v1,uint64x2_t v2)1619 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
1620   return vtstq_u64(v1, v2);
1621 }
1622 
1623 // CHECK-LABEL: @test_vtst_p8(
1624 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1625 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1626 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1627 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t v1,poly8x8_t v2)1628 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
1629   return vtst_p8(v1, v2);
1630 }
1631 
1632 // CHECK-LABEL: @test_vtst_p16(
1633 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1634 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1635 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1636 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1637 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1638 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t v1,poly16x4_t v2)1639 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
1640   return vtst_p16(v1, v2);
1641 }
1642 
1643 // CHECK-LABEL: @test_vtstq_p8(
1644 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1645 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1646 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1647 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t v1,poly8x16_t v2)1648 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
1649   return vtstq_p8(v1, v2);
1650 }
1651 
1652 // CHECK-LABEL: @test_vtstq_p16(
1653 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1654 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1655 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1656 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1657 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1658 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t v1,poly16x8_t v2)1659 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
1660   return vtstq_p16(v1, v2);
1661 }
1662 
1663 // CHECK-LABEL: @test_vtst_s64(
1664 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1665 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1666 // CHECK:   [[TMP2:%.*]] = and <1 x i64> %a, %b
1667 // CHECK:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1668 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1669 // CHECK:   ret <1 x i64> [[VTST_I]]
test_vtst_s64(int64x1_t a,int64x1_t b)1670 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
1671   return vtst_s64(a, b);
1672 }
1673 
1674 // CHECK-LABEL: @test_vtst_u64(
1675 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1676 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1677 // CHECK:   [[TMP2:%.*]] = and <1 x i64> %a, %b
1678 // CHECK:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1679 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1680 // CHECK:   ret <1 x i64> [[VTST_I]]
test_vtst_u64(uint64x1_t a,uint64x1_t b)1681 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
1682   return vtst_u64(a, b);
1683 }
1684 
1685 // CHECK-LABEL: @test_vceq_s8(
1686 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1687 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1688 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t v1,int8x8_t v2)1689 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
1690   return vceq_s8(v1, v2);
1691 }
1692 
1693 // CHECK-LABEL: @test_vceq_s16(
1694 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1695 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1696 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t v1,int16x4_t v2)1697 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
1698   return vceq_s16(v1, v2);
1699 }
1700 
1701 // CHECK-LABEL: @test_vceq_s32(
1702 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1703 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1704 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t v1,int32x2_t v2)1705 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
1706   return vceq_s32(v1, v2);
1707 }
1708 
1709 // CHECK-LABEL: @test_vceq_s64(
1710 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1711 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1712 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_s64(int64x1_t a,int64x1_t b)1713 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
1714   return vceq_s64(a, b);
1715 }
1716 
1717 // CHECK-LABEL: @test_vceq_u64(
1718 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1719 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1720 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_u64(uint64x1_t a,uint64x1_t b)1721 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
1722   return vceq_u64(a, b);
1723 }
1724 
1725 // CHECK-LABEL: @test_vceq_f32(
1726 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1727 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1728 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t v1,float32x2_t v2)1729 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
1730   return vceq_f32(v1, v2);
1731 }
1732 
1733 // CHECK-LABEL: @test_vceq_f64(
1734 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1735 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1736 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_f64(float64x1_t a,float64x1_t b)1737 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
1738   return vceq_f64(a, b);
1739 }
1740 
1741 // CHECK-LABEL: @test_vceq_u8(
1742 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1743 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1744 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t v1,uint8x8_t v2)1745 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
1746   return vceq_u8(v1, v2);
1747 }
1748 
1749 // CHECK-LABEL: @test_vceq_u16(
1750 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1751 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1752 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t v1,uint16x4_t v2)1753 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
1754   return vceq_u16(v1, v2);
1755 }
1756 
1757 // CHECK-LABEL: @test_vceq_u32(
1758 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1759 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1760 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t v1,uint32x2_t v2)1761 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
1762   return vceq_u32(v1, v2);
1763 }
1764 
1765 // CHECK-LABEL: @test_vceq_p8(
1766 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1767 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1768 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t v1,poly8x8_t v2)1769 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
1770   return vceq_p8(v1, v2);
1771 }
1772 
1773 // CHECK-LABEL: @test_vceqq_s8(
1774 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1775 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1776 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t v1,int8x16_t v2)1777 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
1778   return vceqq_s8(v1, v2);
1779 }
1780 
1781 // CHECK-LABEL: @test_vceqq_s16(
1782 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1783 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1784 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t v1,int16x8_t v2)1785 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
1786   return vceqq_s16(v1, v2);
1787 }
1788 
1789 // CHECK-LABEL: @test_vceqq_s32(
1790 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1791 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1792 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t v1,int32x4_t v2)1793 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
1794   return vceqq_s32(v1, v2);
1795 }
1796 
1797 // CHECK-LABEL: @test_vceqq_f32(
1798 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1799 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1800 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t v1,float32x4_t v2)1801 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
1802   return vceqq_f32(v1, v2);
1803 }
1804 
1805 // CHECK-LABEL: @test_vceqq_u8(
1806 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1807 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1808 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t v1,uint8x16_t v2)1809 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
1810   return vceqq_u8(v1, v2);
1811 }
1812 
1813 // CHECK-LABEL: @test_vceqq_u16(
1814 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1815 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1816 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t v1,uint16x8_t v2)1817 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
1818   return vceqq_u16(v1, v2);
1819 }
1820 
1821 // CHECK-LABEL: @test_vceqq_u32(
1822 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1823 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1824 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t v1,uint32x4_t v2)1825 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
1826   return vceqq_u32(v1, v2);
1827 }
1828 
1829 // CHECK-LABEL: @test_vceqq_p8(
1830 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1831 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1832 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t v1,poly8x16_t v2)1833 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
1834   return vceqq_p8(v1, v2);
1835 }
1836 
1837 // CHECK-LABEL: @test_vceqq_s64(
1838 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1839 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1840 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_s64(int64x2_t v1,int64x2_t v2)1841 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
1842   return vceqq_s64(v1, v2);
1843 }
1844 
1845 // CHECK-LABEL: @test_vceqq_u64(
1846 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1847 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1848 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_u64(uint64x2_t v1,uint64x2_t v2)1849 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
1850   return vceqq_u64(v1, v2);
1851 }
1852 
1853 // CHECK-LABEL: @test_vceqq_f64(
1854 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
1855 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1856 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_f64(float64x2_t v1,float64x2_t v2)1857 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
1858   return vceqq_f64(v1, v2);
1859 }
1860 
1861 // CHECK-LABEL: @test_vcge_s8(
1862 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
1863 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1864 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t v1,int8x8_t v2)1865 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
1866   return vcge_s8(v1, v2);
1867 }
1868 
1869 // CHECK-LABEL: @test_vcge_s16(
1870 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
1871 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1872 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t v1,int16x4_t v2)1873 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
1874   return vcge_s16(v1, v2);
1875 }
1876 
1877 // CHECK-LABEL: @test_vcge_s32(
1878 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
1879 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1880 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t v1,int32x2_t v2)1881 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
1882   return vcge_s32(v1, v2);
1883 }
1884 
1885 // CHECK-LABEL: @test_vcge_s64(
1886 // CHECK:   [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
1887 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1888 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_s64(int64x1_t a,int64x1_t b)1889 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
1890   return vcge_s64(a, b);
1891 }
1892 
1893 // CHECK-LABEL: @test_vcge_u64(
1894 // CHECK:   [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
1895 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1896 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_u64(uint64x1_t a,uint64x1_t b)1897 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
1898   return vcge_u64(a, b);
1899 }
1900 
1901 // CHECK-LABEL: @test_vcge_f32(
1902 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
1903 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1904 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t v1,float32x2_t v2)1905 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
1906   return vcge_f32(v1, v2);
1907 }
1908 
1909 // CHECK-LABEL: @test_vcge_f64(
1910 // CHECK:   [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
1911 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1912 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_f64(float64x1_t a,float64x1_t b)1913 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
1914   return vcge_f64(a, b);
1915 }
1916 
1917 // CHECK-LABEL: @test_vcge_u8(
1918 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
1919 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1920 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t v1,uint8x8_t v2)1921 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
1922   return vcge_u8(v1, v2);
1923 }
1924 
1925 // CHECK-LABEL: @test_vcge_u16(
1926 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
1927 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1928 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t v1,uint16x4_t v2)1929 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
1930   return vcge_u16(v1, v2);
1931 }
1932 
1933 // CHECK-LABEL: @test_vcge_u32(
1934 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
1935 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1936 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t v1,uint32x2_t v2)1937 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
1938   return vcge_u32(v1, v2);
1939 }
1940 
1941 // CHECK-LABEL: @test_vcgeq_s8(
1942 // CHECK:   [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
1943 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1944 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t v1,int8x16_t v2)1945 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
1946   return vcgeq_s8(v1, v2);
1947 }
1948 
1949 // CHECK-LABEL: @test_vcgeq_s16(
1950 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
1951 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1952 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t v1,int16x8_t v2)1953 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
1954   return vcgeq_s16(v1, v2);
1955 }
1956 
1957 // CHECK-LABEL: @test_vcgeq_s32(
1958 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
1959 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1960 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t v1,int32x4_t v2)1961 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
1962   return vcgeq_s32(v1, v2);
1963 }
1964 
1965 // CHECK-LABEL: @test_vcgeq_f32(
1966 // CHECK:   [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
1967 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1968 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t v1,float32x4_t v2)1969 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
1970   return vcgeq_f32(v1, v2);
1971 }
1972 
1973 // CHECK-LABEL: @test_vcgeq_u8(
1974 // CHECK:   [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
1975 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1976 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t v1,uint8x16_t v2)1977 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
1978   return vcgeq_u8(v1, v2);
1979 }
1980 
1981 // CHECK-LABEL: @test_vcgeq_u16(
1982 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
1983 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1984 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t v1,uint16x8_t v2)1985 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
1986   return vcgeq_u16(v1, v2);
1987 }
1988 
1989 // CHECK-LABEL: @test_vcgeq_u32(
1990 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
1991 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1992 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t v1,uint32x4_t v2)1993 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
1994   return vcgeq_u32(v1, v2);
1995 }
1996 
1997 // CHECK-LABEL: @test_vcgeq_s64(
1998 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
1999 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2000 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_s64(int64x2_t v1,int64x2_t v2)2001 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
2002   return vcgeq_s64(v1, v2);
2003 }
2004 
2005 // CHECK-LABEL: @test_vcgeq_u64(
2006 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2007 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2008 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_u64(uint64x2_t v1,uint64x2_t v2)2009 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
2010   return vcgeq_u64(v1, v2);
2011 }
2012 
2013 // CHECK-LABEL: @test_vcgeq_f64(
2014 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2015 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2016 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_f64(float64x2_t v1,float64x2_t v2)2017 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
2018   return vcgeq_f64(v1, v2);
2019 }
2020 
2021 // CHECK-LABEL: @test_vcle_s8(
2022 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2023 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2024 // CHECK:   ret <8 x i8> [[SEXT_I]]
2025 // Notes about vcle:
2026 // LE condition predicate implemented as GE, so check reversed operands.
2027 // Using registers other than v0, v1 are possible, but would be odd.
test_vcle_s8(int8x8_t v1,int8x8_t v2)2028 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
2029   return vcle_s8(v1, v2);
2030 }
2031 
2032 // CHECK-LABEL: @test_vcle_s16(
2033 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2034 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2035 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t v1,int16x4_t v2)2036 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
2037   return vcle_s16(v1, v2);
2038 }
2039 
2040 // CHECK-LABEL: @test_vcle_s32(
2041 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2042 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2043 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t v1,int32x2_t v2)2044 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
2045   return vcle_s32(v1, v2);
2046 }
2047 
2048 // CHECK-LABEL: @test_vcle_s64(
2049 // CHECK:   [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2050 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2051 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_s64(int64x1_t a,int64x1_t b)2052 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
2053   return vcle_s64(a, b);
2054 }
2055 
2056 // CHECK-LABEL: @test_vcle_u64(
2057 // CHECK:   [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2058 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2059 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_u64(uint64x1_t a,uint64x1_t b)2060 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
2061   return vcle_u64(a, b);
2062 }
2063 
2064 // CHECK-LABEL: @test_vcle_f32(
2065 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2066 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2067 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t v1,float32x2_t v2)2068 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
2069   return vcle_f32(v1, v2);
2070 }
2071 
2072 // CHECK-LABEL: @test_vcle_f64(
2073 // CHECK:   [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2074 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2075 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_f64(float64x1_t a,float64x1_t b)2076 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
2077   return vcle_f64(a, b);
2078 }
2079 
2080 // CHECK-LABEL: @test_vcle_u8(
2081 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2082 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2083 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t v1,uint8x8_t v2)2084 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
2085   return vcle_u8(v1, v2);
2086 }
2087 
2088 // CHECK-LABEL: @test_vcle_u16(
2089 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2090 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2091 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t v1,uint16x4_t v2)2092 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
2093   return vcle_u16(v1, v2);
2094 }
2095 
2096 // CHECK-LABEL: @test_vcle_u32(
2097 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2098 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2099 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t v1,uint32x2_t v2)2100 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
2101   return vcle_u32(v1, v2);
2102 }
2103 
2104 // CHECK-LABEL: @test_vcleq_s8(
2105 // CHECK:   [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2106 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2107 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t v1,int8x16_t v2)2108 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
2109   return vcleq_s8(v1, v2);
2110 }
2111 
2112 // CHECK-LABEL: @test_vcleq_s16(
2113 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2114 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2115 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t v1,int16x8_t v2)2116 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
2117   return vcleq_s16(v1, v2);
2118 }
2119 
2120 // CHECK-LABEL: @test_vcleq_s32(
2121 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2122 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2123 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t v1,int32x4_t v2)2124 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
2125   return vcleq_s32(v1, v2);
2126 }
2127 
2128 // CHECK-LABEL: @test_vcleq_f32(
2129 // CHECK:   [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2130 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2131 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t v1,float32x4_t v2)2132 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
2133   return vcleq_f32(v1, v2);
2134 }
2135 
2136 // CHECK-LABEL: @test_vcleq_u8(
2137 // CHECK:   [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2138 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2139 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t v1,uint8x16_t v2)2140 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
2141   return vcleq_u8(v1, v2);
2142 }
2143 
2144 // CHECK-LABEL: @test_vcleq_u16(
2145 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2146 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2147 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t v1,uint16x8_t v2)2148 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
2149   return vcleq_u16(v1, v2);
2150 }
2151 
2152 // CHECK-LABEL: @test_vcleq_u32(
2153 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2154 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2155 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t v1,uint32x4_t v2)2156 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
2157   return vcleq_u32(v1, v2);
2158 }
2159 
2160 // CHECK-LABEL: @test_vcleq_s64(
2161 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2162 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2163 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_s64(int64x2_t v1,int64x2_t v2)2164 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
2165   return vcleq_s64(v1, v2);
2166 }
2167 
2168 // CHECK-LABEL: @test_vcleq_u64(
2169 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2170 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2171 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_u64(uint64x2_t v1,uint64x2_t v2)2172 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
2173   return vcleq_u64(v1, v2);
2174 }
2175 
2176 // CHECK-LABEL: @test_vcleq_f64(
2177 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2178 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2179 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_f64(float64x2_t v1,float64x2_t v2)2180 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
2181   return vcleq_f64(v1, v2);
2182 }
2183 
2184 // CHECK-LABEL: @test_vcgt_s8(
2185 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2186 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2187 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t v1,int8x8_t v2)2188 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
2189   return vcgt_s8(v1, v2);
2190 }
2191 
2192 // CHECK-LABEL: @test_vcgt_s16(
2193 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2194 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2195 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t v1,int16x4_t v2)2196 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
2197   return vcgt_s16(v1, v2);
2198 }
2199 
2200 // CHECK-LABEL: @test_vcgt_s32(
2201 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2202 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2203 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t v1,int32x2_t v2)2204 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
2205   return vcgt_s32(v1, v2);
2206 }
2207 
2208 // CHECK-LABEL: @test_vcgt_s64(
2209 // CHECK:   [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2210 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2211 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_s64(int64x1_t a,int64x1_t b)2212 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
2213   return vcgt_s64(a, b);
2214 }
2215 
2216 // CHECK-LABEL: @test_vcgt_u64(
2217 // CHECK:   [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2218 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2219 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_u64(uint64x1_t a,uint64x1_t b)2220 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
2221   return vcgt_u64(a, b);
2222 }
2223 
2224 // CHECK-LABEL: @test_vcgt_f32(
2225 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2226 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2227 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t v1,float32x2_t v2)2228 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
2229   return vcgt_f32(v1, v2);
2230 }
2231 
2232 // CHECK-LABEL: @test_vcgt_f64(
2233 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2234 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2235 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_f64(float64x1_t a,float64x1_t b)2236 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
2237   return vcgt_f64(a, b);
2238 }
2239 
2240 // CHECK-LABEL: @test_vcgt_u8(
2241 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2242 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2243 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t v1,uint8x8_t v2)2244 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
2245   return vcgt_u8(v1, v2);
2246 }
2247 
2248 // CHECK-LABEL: @test_vcgt_u16(
2249 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2250 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2251 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t v1,uint16x4_t v2)2252 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
2253   return vcgt_u16(v1, v2);
2254 }
2255 
2256 // CHECK-LABEL: @test_vcgt_u32(
2257 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2258 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2259 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t v1,uint32x2_t v2)2260 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
2261   return vcgt_u32(v1, v2);
2262 }
2263 
2264 // CHECK-LABEL: @test_vcgtq_s8(
2265 // CHECK:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2266 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2267 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t v1,int8x16_t v2)2268 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
2269   return vcgtq_s8(v1, v2);
2270 }
2271 
2272 // CHECK-LABEL: @test_vcgtq_s16(
2273 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2274 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2275 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t v1,int16x8_t v2)2276 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
2277   return vcgtq_s16(v1, v2);
2278 }
2279 
2280 // CHECK-LABEL: @test_vcgtq_s32(
2281 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2282 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2283 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t v1,int32x4_t v2)2284 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
2285   return vcgtq_s32(v1, v2);
2286 }
2287 
2288 // CHECK-LABEL: @test_vcgtq_f32(
2289 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2290 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2291 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t v1,float32x4_t v2)2292 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
2293   return vcgtq_f32(v1, v2);
2294 }
2295 
2296 // CHECK-LABEL: @test_vcgtq_u8(
2297 // CHECK:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2298 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2299 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t v1,uint8x16_t v2)2300 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
2301   return vcgtq_u8(v1, v2);
2302 }
2303 
2304 // CHECK-LABEL: @test_vcgtq_u16(
2305 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2306 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2307 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t v1,uint16x8_t v2)2308 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
2309   return vcgtq_u16(v1, v2);
2310 }
2311 
2312 // CHECK-LABEL: @test_vcgtq_u32(
2313 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2314 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2315 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t v1,uint32x4_t v2)2316 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
2317   return vcgtq_u32(v1, v2);
2318 }
2319 
2320 // CHECK-LABEL: @test_vcgtq_s64(
2321 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2322 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2323 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_s64(int64x2_t v1,int64x2_t v2)2324 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
2325   return vcgtq_s64(v1, v2);
2326 }
2327 
2328 // CHECK-LABEL: @test_vcgtq_u64(
2329 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2330 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2331 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_u64(uint64x2_t v1,uint64x2_t v2)2332 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
2333   return vcgtq_u64(v1, v2);
2334 }
2335 
2336 // CHECK-LABEL: @test_vcgtq_f64(
2337 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2338 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2339 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_f64(float64x2_t v1,float64x2_t v2)2340 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
2341   return vcgtq_f64(v1, v2);
2342 }
2343 
2344 // CHECK-LABEL: @test_vclt_s8(
2345 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2346 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2347 // CHECK:   ret <8 x i8> [[SEXT_I]]
2348 // Notes about vclt:
2349 // LT condition predicate implemented as GT, so check reversed operands.
2350 // Using registers other than v0, v1 are possible, but would be odd.
test_vclt_s8(int8x8_t v1,int8x8_t v2)2351 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
2352   return vclt_s8(v1, v2);
2353 }
2354 
2355 // CHECK-LABEL: @test_vclt_s16(
2356 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2357 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2358 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t v1,int16x4_t v2)2359 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
2360   return vclt_s16(v1, v2);
2361 }
2362 
2363 // CHECK-LABEL: @test_vclt_s32(
2364 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2365 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2366 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t v1,int32x2_t v2)2367 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
2368   return vclt_s32(v1, v2);
2369 }
2370 
2371 // CHECK-LABEL: @test_vclt_s64(
2372 // CHECK:   [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2373 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2374 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_s64(int64x1_t a,int64x1_t b)2375 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
2376   return vclt_s64(a, b);
2377 }
2378 
2379 // CHECK-LABEL: @test_vclt_u64(
2380 // CHECK:   [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2381 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2382 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_u64(uint64x1_t a,uint64x1_t b)2383 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
2384   return vclt_u64(a, b);
2385 }
2386 
2387 // CHECK-LABEL: @test_vclt_f32(
2388 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2389 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2390 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t v1,float32x2_t v2)2391 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
2392   return vclt_f32(v1, v2);
2393 }
2394 
2395 // CHECK-LABEL: @test_vclt_f64(
2396 // CHECK:   [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2397 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2398 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_f64(float64x1_t a,float64x1_t b)2399 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
2400   return vclt_f64(a, b);
2401 }
2402 
2403 // CHECK-LABEL: @test_vclt_u8(
2404 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2405 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2406 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t v1,uint8x8_t v2)2407 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
2408   return vclt_u8(v1, v2);
2409 }
2410 
2411 // CHECK-LABEL: @test_vclt_u16(
2412 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2413 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2414 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t v1,uint16x4_t v2)2415 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
2416   return vclt_u16(v1, v2);
2417 }
2418 
2419 // CHECK-LABEL: @test_vclt_u32(
2420 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2421 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2422 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t v1,uint32x2_t v2)2423 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
2424   return vclt_u32(v1, v2);
2425 }
2426 
2427 // CHECK-LABEL: @test_vcltq_s8(
2428 // CHECK:   [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2429 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2430 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t v1,int8x16_t v2)2431 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
2432   return vcltq_s8(v1, v2);
2433 }
2434 
2435 // CHECK-LABEL: @test_vcltq_s16(
2436 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2437 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2438 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t v1,int16x8_t v2)2439 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
2440   return vcltq_s16(v1, v2);
2441 }
2442 
2443 // CHECK-LABEL: @test_vcltq_s32(
2444 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2445 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2446 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t v1,int32x4_t v2)2447 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
2448   return vcltq_s32(v1, v2);
2449 }
2450 
2451 // CHECK-LABEL: @test_vcltq_f32(
2452 // CHECK:   [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2453 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2454 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t v1,float32x4_t v2)2455 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
2456   return vcltq_f32(v1, v2);
2457 }
2458 
2459 // CHECK-LABEL: @test_vcltq_u8(
2460 // CHECK:   [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2461 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2462 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t v1,uint8x16_t v2)2463 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
2464   return vcltq_u8(v1, v2);
2465 }
2466 
2467 // CHECK-LABEL: @test_vcltq_u16(
2468 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2469 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2470 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t v1,uint16x8_t v2)2471 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
2472   return vcltq_u16(v1, v2);
2473 }
2474 
2475 // CHECK-LABEL: @test_vcltq_u32(
2476 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2477 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2478 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t v1,uint32x4_t v2)2479 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
2480   return vcltq_u32(v1, v2);
2481 }
2482 
2483 // CHECK-LABEL: @test_vcltq_s64(
2484 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2485 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2486 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_s64(int64x2_t v1,int64x2_t v2)2487 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
2488   return vcltq_s64(v1, v2);
2489 }
2490 
2491 // CHECK-LABEL: @test_vcltq_u64(
2492 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2493 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2494 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_u64(uint64x2_t v1,uint64x2_t v2)2495 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
2496   return vcltq_u64(v1, v2);
2497 }
2498 
2499 // CHECK-LABEL: @test_vcltq_f64(
2500 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2501 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2502 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_f64(float64x2_t v1,float64x2_t v2)2503 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
2504   return vcltq_f64(v1, v2);
2505 }
2506 
2507 // CHECK-LABEL: @test_vhadd_s8(
2508 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2509 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t v1,int8x8_t v2)2510 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
2511   return vhadd_s8(v1, v2);
2512 }
2513 
2514 // CHECK-LABEL: @test_vhadd_s16(
2515 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2516 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2517 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2518 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2519 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_s16(int16x4_t v1,int16x4_t v2)2520 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
2521   return vhadd_s16(v1, v2);
2522 }
2523 
2524 // CHECK-LABEL: @test_vhadd_s32(
2525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2527 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2528 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2529 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_s32(int32x2_t v1,int32x2_t v2)2530 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
2531   return vhadd_s32(v1, v2);
2532 }
2533 
2534 // CHECK-LABEL: @test_vhadd_u8(
2535 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2536 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t v1,uint8x8_t v2)2537 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2538   return vhadd_u8(v1, v2);
2539 }
2540 
2541 // CHECK-LABEL: @test_vhadd_u16(
2542 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2543 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2544 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2545 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2546 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_u16(uint16x4_t v1,uint16x4_t v2)2547 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2548   return vhadd_u16(v1, v2);
2549 }
2550 
2551 // CHECK-LABEL: @test_vhadd_u32(
2552 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2553 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2554 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2555 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2556 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_u32(uint32x2_t v1,uint32x2_t v2)2557 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2558   return vhadd_u32(v1, v2);
2559 }
2560 
2561 // CHECK-LABEL: @test_vhaddq_s8(
2562 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2563 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t v1,int8x16_t v2)2564 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
2565   return vhaddq_s8(v1, v2);
2566 }
2567 
2568 // CHECK-LABEL: @test_vhaddq_s16(
2569 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2570 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2571 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2572 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2573 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_s16(int16x8_t v1,int16x8_t v2)2574 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
2575   return vhaddq_s16(v1, v2);
2576 }
2577 
2578 // CHECK-LABEL: @test_vhaddq_s32(
2579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2580 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2581 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2582 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2583 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_s32(int32x4_t v1,int32x4_t v2)2584 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
2585   return vhaddq_s32(v1, v2);
2586 }
2587 
2588 // CHECK-LABEL: @test_vhaddq_u8(
2589 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2590 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t v1,uint8x16_t v2)2591 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2592   return vhaddq_u8(v1, v2);
2593 }
2594 
2595 // CHECK-LABEL: @test_vhaddq_u16(
2596 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2597 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2598 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2599 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2600 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_u16(uint16x8_t v1,uint16x8_t v2)2601 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2602   return vhaddq_u16(v1, v2);
2603 }
2604 
2605 // CHECK-LABEL: @test_vhaddq_u32(
2606 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2607 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2608 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2609 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2610 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_u32(uint32x4_t v1,uint32x4_t v2)2611 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2612   return vhaddq_u32(v1, v2);
2613 }
2614 
2615 // CHECK-LABEL: @test_vhsub_s8(
2616 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2617 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t v1,int8x8_t v2)2618 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
2619   return vhsub_s8(v1, v2);
2620 }
2621 
2622 // CHECK-LABEL: @test_vhsub_s16(
2623 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2624 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2625 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2626 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2627 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_s16(int16x4_t v1,int16x4_t v2)2628 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
2629   return vhsub_s16(v1, v2);
2630 }
2631 
2632 // CHECK-LABEL: @test_vhsub_s32(
2633 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2634 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2635 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2636 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2637 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_s32(int32x2_t v1,int32x2_t v2)2638 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
2639   return vhsub_s32(v1, v2);
2640 }
2641 
2642 // CHECK-LABEL: @test_vhsub_u8(
2643 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2644 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t v1,uint8x8_t v2)2645 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
2646   return vhsub_u8(v1, v2);
2647 }
2648 
2649 // CHECK-LABEL: @test_vhsub_u16(
2650 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2651 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2652 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2653 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2654 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_u16(uint16x4_t v1,uint16x4_t v2)2655 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
2656   return vhsub_u16(v1, v2);
2657 }
2658 
2659 // CHECK-LABEL: @test_vhsub_u32(
2660 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2661 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2662 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2663 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2664 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_u32(uint32x2_t v1,uint32x2_t v2)2665 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
2666   return vhsub_u32(v1, v2);
2667 }
2668 
2669 // CHECK-LABEL: @test_vhsubq_s8(
2670 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2671 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t v1,int8x16_t v2)2672 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
2673   return vhsubq_s8(v1, v2);
2674 }
2675 
2676 // CHECK-LABEL: @test_vhsubq_s16(
2677 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2678 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2679 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2680 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2681 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_s16(int16x8_t v1,int16x8_t v2)2682 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
2683   return vhsubq_s16(v1, v2);
2684 }
2685 
2686 // CHECK-LABEL: @test_vhsubq_s32(
2687 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2688 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2689 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2690 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2691 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_s32(int32x4_t v1,int32x4_t v2)2692 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
2693   return vhsubq_s32(v1, v2);
2694 }
2695 
2696 // CHECK-LABEL: @test_vhsubq_u8(
2697 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2698 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t v1,uint8x16_t v2)2699 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
2700   return vhsubq_u8(v1, v2);
2701 }
2702 
2703 // CHECK-LABEL: @test_vhsubq_u16(
2704 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2705 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2706 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2707 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2708 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_u16(uint16x8_t v1,uint16x8_t v2)2709 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
2710   return vhsubq_u16(v1, v2);
2711 }
2712 
2713 // CHECK-LABEL: @test_vhsubq_u32(
2714 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2715 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2716 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2717 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2718 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_u32(uint32x4_t v1,uint32x4_t v2)2719 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
2720   return vhsubq_u32(v1, v2);
2721 }
2722 
2723 // CHECK-LABEL: @test_vrhadd_s8(
2724 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2725 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t v1,int8x8_t v2)2726 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
2727   return vrhadd_s8(v1, v2);
2728 }
2729 
2730 // CHECK-LABEL: @test_vrhadd_s16(
2731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2732 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2733 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2734 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2735 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_s16(int16x4_t v1,int16x4_t v2)2736 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
2737   return vrhadd_s16(v1, v2);
2738 }
2739 
2740 // CHECK-LABEL: @test_vrhadd_s32(
2741 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2742 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2743 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2744 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2745 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_s32(int32x2_t v1,int32x2_t v2)2746 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
2747   return vrhadd_s32(v1, v2);
2748 }
2749 
2750 // CHECK-LABEL: @test_vrhadd_u8(
2751 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2752 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t v1,uint8x8_t v2)2753 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2754   return vrhadd_u8(v1, v2);
2755 }
2756 
2757 // CHECK-LABEL: @test_vrhadd_u16(
2758 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2759 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2760 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2761 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2762 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_u16(uint16x4_t v1,uint16x4_t v2)2763 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2764   return vrhadd_u16(v1, v2);
2765 }
2766 
2767 // CHECK-LABEL: @test_vrhadd_u32(
2768 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2769 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2770 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2771 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2772 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_u32(uint32x2_t v1,uint32x2_t v2)2773 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2774   return vrhadd_u32(v1, v2);
2775 }
2776 
2777 // CHECK-LABEL: @test_vrhaddq_s8(
2778 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2779 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t v1,int8x16_t v2)2780 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
2781   return vrhaddq_s8(v1, v2);
2782 }
2783 
2784 // CHECK-LABEL: @test_vrhaddq_s16(
2785 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2786 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2787 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2788 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2789 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_s16(int16x8_t v1,int16x8_t v2)2790 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
2791   return vrhaddq_s16(v1, v2);
2792 }
2793 
2794 // CHECK-LABEL: @test_vrhaddq_s32(
2795 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2796 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2797 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2798 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2799 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_s32(int32x4_t v1,int32x4_t v2)2800 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
2801   return vrhaddq_s32(v1, v2);
2802 }
2803 
2804 // CHECK-LABEL: @test_vrhaddq_u8(
2805 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2806 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t v1,uint8x16_t v2)2807 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2808   return vrhaddq_u8(v1, v2);
2809 }
2810 
2811 // CHECK-LABEL: @test_vrhaddq_u16(
2812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2813 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2814 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2815 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2816 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_u16(uint16x8_t v1,uint16x8_t v2)2817 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2818   return vrhaddq_u16(v1, v2);
2819 }
2820 
2821 // CHECK-LABEL: @test_vrhaddq_u32(
2822 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2823 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2824 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2825 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2826 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_u32(uint32x4_t v1,uint32x4_t v2)2827 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2828   return vrhaddq_u32(v1, v2);
2829 }
2830 
2831 // CHECK-LABEL: @test_vqadd_s8(
2832 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2833 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)2834 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
2835   return vqadd_s8(a, b);
2836 }
2837 
2838 // CHECK-LABEL: @test_vqadd_s16(
2839 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2840 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2841 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2842 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2843 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_s16(int16x4_t a,int16x4_t b)2844 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
2845   return vqadd_s16(a, b);
2846 }
2847 
2848 // CHECK-LABEL: @test_vqadd_s32(
2849 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2850 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2851 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2852 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2853 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_s32(int32x2_t a,int32x2_t b)2854 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
2855   return vqadd_s32(a, b);
2856 }
2857 
2858 // CHECK-LABEL: @test_vqadd_s64(
2859 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2860 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2861 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2862 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2863 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_s64(int64x1_t a,int64x1_t b)2864 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
2865   return vqadd_s64(a, b);
2866 }
2867 
2868 // CHECK-LABEL: @test_vqadd_u8(
2869 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2870 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)2871 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
2872   return vqadd_u8(a, b);
2873 }
2874 
2875 // CHECK-LABEL: @test_vqadd_u16(
2876 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2877 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2878 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2879 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2880 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)2881 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
2882   return vqadd_u16(a, b);
2883 }
2884 
2885 // CHECK-LABEL: @test_vqadd_u32(
2886 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2887 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2888 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2889 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2890 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)2891 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
2892   return vqadd_u32(a, b);
2893 }
2894 
2895 // CHECK-LABEL: @test_vqadd_u64(
2896 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2897 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2898 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2899 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2900 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)2901 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
2902   return vqadd_u64(a, b);
2903 }
2904 
2905 // CHECK-LABEL: @test_vqaddq_s8(
2906 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2907 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)2908 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
2909   return vqaddq_s8(a, b);
2910 }
2911 
2912 // CHECK-LABEL: @test_vqaddq_s16(
2913 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2914 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2915 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2916 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2917 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)2918 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
2919   return vqaddq_s16(a, b);
2920 }
2921 
2922 // CHECK-LABEL: @test_vqaddq_s32(
2923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2925 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2926 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2927 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)2928 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
2929   return vqaddq_s32(a, b);
2930 }
2931 
2932 // CHECK-LABEL: @test_vqaddq_s64(
2933 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2934 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2935 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2936 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2937 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)2938 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
2939   return vqaddq_s64(a, b);
2940 }
2941 
2942 // CHECK-LABEL: @test_vqaddq_u8(
2943 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2944 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)2945 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
2946   return vqaddq_u8(a, b);
2947 }
2948 
2949 // CHECK-LABEL: @test_vqaddq_u16(
2950 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2951 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2952 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2953 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2954 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)2955 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
2956   return vqaddq_u16(a, b);
2957 }
2958 
2959 // CHECK-LABEL: @test_vqaddq_u32(
2960 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2961 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2962 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2963 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2964 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)2965 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
2966   return vqaddq_u32(a, b);
2967 }
2968 
2969 // CHECK-LABEL: @test_vqaddq_u64(
2970 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2971 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2972 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2973 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2974 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)2975 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
2976   return vqaddq_u64(a, b);
2977 }
2978 
2979 // CHECK-LABEL: @test_vqsub_s8(
2980 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
2981 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)2982 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
2983   return vqsub_s8(a, b);
2984 }
2985 
2986 // CHECK-LABEL: @test_vqsub_s16(
2987 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2988 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2989 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
2990 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
2991 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_s16(int16x4_t a,int16x4_t b)2992 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
2993   return vqsub_s16(a, b);
2994 }
2995 
2996 // CHECK-LABEL: @test_vqsub_s32(
2997 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2998 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2999 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3000 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3001 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_s32(int32x2_t a,int32x2_t b)3002 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
3003   return vqsub_s32(a, b);
3004 }
3005 
3006 // CHECK-LABEL: @test_vqsub_s64(
3007 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3008 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3009 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3010 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3011 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_s64(int64x1_t a,int64x1_t b)3012 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
3013   return vqsub_s64(a, b);
3014 }
3015 
3016 // CHECK-LABEL: @test_vqsub_u8(
3017 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
3018 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)3019 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
3020   return vqsub_u8(a, b);
3021 }
3022 
3023 // CHECK-LABEL: @test_vqsub_u16(
3024 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3025 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3026 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
3027 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3028 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)3029 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
3030   return vqsub_u16(a, b);
3031 }
3032 
3033 // CHECK-LABEL: @test_vqsub_u32(
3034 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3035 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3036 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3037 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3038 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)3039 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
3040   return vqsub_u32(a, b);
3041 }
3042 
3043 // CHECK-LABEL: @test_vqsub_u64(
3044 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3045 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3046 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3047 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3048 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)3049 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
3050   return vqsub_u64(a, b);
3051 }
3052 
3053 // CHECK-LABEL: @test_vqsubq_s8(
3054 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3055 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)3056 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
3057   return vqsubq_s8(a, b);
3058 }
3059 
3060 // CHECK-LABEL: @test_vqsubq_s16(
3061 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3062 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3063 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3064 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3065 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)3066 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
3067   return vqsubq_s16(a, b);
3068 }
3069 
3070 // CHECK-LABEL: @test_vqsubq_s32(
3071 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3072 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3073 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3074 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3075 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)3076 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
3077   return vqsubq_s32(a, b);
3078 }
3079 
3080 // CHECK-LABEL: @test_vqsubq_s64(
3081 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3082 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3083 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3084 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3085 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)3086 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
3087   return vqsubq_s64(a, b);
3088 }
3089 
3090 // CHECK-LABEL: @test_vqsubq_u8(
3091 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3092 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)3093 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
3094   return vqsubq_u8(a, b);
3095 }
3096 
3097 // CHECK-LABEL: @test_vqsubq_u16(
3098 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3099 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3100 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3101 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3102 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)3103 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
3104   return vqsubq_u16(a, b);
3105 }
3106 
3107 // CHECK-LABEL: @test_vqsubq_u32(
3108 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3109 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3110 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3111 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3112 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)3113 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
3114   return vqsubq_u32(a, b);
3115 }
3116 
3117 // CHECK-LABEL: @test_vqsubq_u64(
3118 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3119 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3120 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3121 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3122 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)3123 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
3124   return vqsubq_u64(a, b);
3125 }
3126 
3127 // CHECK-LABEL: @test_vshl_s8(
3128 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3129 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)3130 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
3131   return vshl_s8(a, b);
3132 }
3133 
3134 // CHECK-LABEL: @test_vshl_s16(
3135 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3136 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3137 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3138 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3139 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_s16(int16x4_t a,int16x4_t b)3140 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
3141   return vshl_s16(a, b);
3142 }
3143 
3144 // CHECK-LABEL: @test_vshl_s32(
3145 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3146 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3147 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3148 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3149 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_s32(int32x2_t a,int32x2_t b)3150 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
3151   return vshl_s32(a, b);
3152 }
3153 
3154 // CHECK-LABEL: @test_vshl_s64(
3155 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3156 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3157 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3158 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3159 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_s64(int64x1_t a,int64x1_t b)3160 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
3161   return vshl_s64(a, b);
3162 }
3163 
3164 // CHECK-LABEL: @test_vshl_u8(
3165 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b)
3166 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)3167 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
3168   return vshl_u8(a, b);
3169 }
3170 
3171 // CHECK-LABEL: @test_vshl_u16(
3172 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3173 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3174 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b)
3175 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3176 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_u16(uint16x4_t a,int16x4_t b)3177 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
3178   return vshl_u16(a, b);
3179 }
3180 
3181 // CHECK-LABEL: @test_vshl_u32(
3182 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3183 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3184 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b)
3185 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3186 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_u32(uint32x2_t a,int32x2_t b)3187 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
3188   return vshl_u32(a, b);
3189 }
3190 
3191 // CHECK-LABEL: @test_vshl_u64(
3192 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3193 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3194 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b)
3195 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3196 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_u64(uint64x1_t a,int64x1_t b)3197 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
3198   return vshl_u64(a, b);
3199 }
3200 
3201 // CHECK-LABEL: @test_vshlq_s8(
3202 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3203 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)3204 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3205   return vshlq_s8(a, b);
3206 }
3207 
3208 // CHECK-LABEL: @test_vshlq_s16(
3209 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3210 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3211 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3212 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3213 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_s16(int16x8_t a,int16x8_t b)3214 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3215   return vshlq_s16(a, b);
3216 }
3217 
3218 // CHECK-LABEL: @test_vshlq_s32(
3219 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3220 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3221 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3222 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3223 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_s32(int32x4_t a,int32x4_t b)3224 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3225   return vshlq_s32(a, b);
3226 }
3227 
3228 // CHECK-LABEL: @test_vshlq_s64(
3229 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3230 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3231 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3232 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3233 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_s64(int64x2_t a,int64x2_t b)3234 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3235   return vshlq_s64(a, b);
3236 }
3237 
3238 // CHECK-LABEL: @test_vshlq_u8(
3239 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
3240 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)3241 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3242   return vshlq_u8(a, b);
3243 }
3244 
3245 // CHECK-LABEL: @test_vshlq_u16(
3246 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3247 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3248 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
3249 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3250 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)3251 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3252   return vshlq_u16(a, b);
3253 }
3254 
3255 // CHECK-LABEL: @test_vshlq_u32(
3256 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3257 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3258 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
3259 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3260 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)3261 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3262   return vshlq_u32(a, b);
3263 }
3264 
3265 // CHECK-LABEL: @test_vshlq_u64(
3266 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3267 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3268 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
3269 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3270 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)3271 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3272   return vshlq_u64(a, b);
3273 }
3274 
3275 // CHECK-LABEL: @test_vqshl_s8(
3276 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3277 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)3278 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
3279   return vqshl_s8(a, b);
3280 }
3281 
3282 // CHECK-LABEL: @test_vqshl_s16(
3283 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3286 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3287 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_s16(int16x4_t a,int16x4_t b)3288 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
3289   return vqshl_s16(a, b);
3290 }
3291 
3292 // CHECK-LABEL: @test_vqshl_s32(
3293 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3294 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3295 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3296 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3297 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_s32(int32x2_t a,int32x2_t b)3298 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
3299   return vqshl_s32(a, b);
3300 }
3301 
3302 // CHECK-LABEL: @test_vqshl_s64(
3303 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3304 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3305 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3306 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3307 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_s64(int64x1_t a,int64x1_t b)3308 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
3309   return vqshl_s64(a, b);
3310 }
3311 
3312 // CHECK-LABEL: @test_vqshl_u8(
3313 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3314 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)3315 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
3316   return vqshl_u8(a, b);
3317 }
3318 
3319 // CHECK-LABEL: @test_vqshl_u16(
3320 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3321 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3322 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3323 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3324 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)3325 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
3326   return vqshl_u16(a, b);
3327 }
3328 
3329 // CHECK-LABEL: @test_vqshl_u32(
3330 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3331 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3332 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3333 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3334 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)3335 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
3336   return vqshl_u32(a, b);
3337 }
3338 
3339 // CHECK-LABEL: @test_vqshl_u64(
3340 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3341 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3342 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3343 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3344 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)3345 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
3346   return vqshl_u64(a, b);
3347 }
3348 
3349 // CHECK-LABEL: @test_vqshlq_s8(
3350 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3351 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)3352 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
3353   return vqshlq_s8(a, b);
3354 }
3355 
3356 // CHECK-LABEL: @test_vqshlq_s16(
3357 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3358 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3359 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3360 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3361 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)3362 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
3363   return vqshlq_s16(a, b);
3364 }
3365 
3366 // CHECK-LABEL: @test_vqshlq_s32(
3367 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3368 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3369 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3370 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3371 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)3372 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
3373   return vqshlq_s32(a, b);
3374 }
3375 
3376 // CHECK-LABEL: @test_vqshlq_s64(
3377 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3378 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3379 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3380 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3381 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)3382 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
3383   return vqshlq_s64(a, b);
3384 }
3385 
3386 // CHECK-LABEL: @test_vqshlq_u8(
3387 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3388 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)3389 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
3390   return vqshlq_u8(a, b);
3391 }
3392 
3393 // CHECK-LABEL: @test_vqshlq_u16(
3394 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3395 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3396 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3397 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3398 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)3399 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
3400   return vqshlq_u16(a, b);
3401 }
3402 
3403 // CHECK-LABEL: @test_vqshlq_u32(
3404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3405 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3406 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3407 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3408 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)3409 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
3410   return vqshlq_u32(a, b);
3411 }
3412 
3413 // CHECK-LABEL: @test_vqshlq_u64(
3414 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3415 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3416 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3417 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3418 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)3419 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
3420   return vqshlq_u64(a, b);
3421 }
3422 
3423 // CHECK-LABEL: @test_vrshl_s8(
3424 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3425 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)3426 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3427   return vrshl_s8(a, b);
3428 }
3429 
3430 // CHECK-LABEL: @test_vrshl_s16(
3431 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3432 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3433 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3434 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3435 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_s16(int16x4_t a,int16x4_t b)3436 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3437   return vrshl_s16(a, b);
3438 }
3439 
3440 // CHECK-LABEL: @test_vrshl_s32(
3441 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3442 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3443 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3444 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3445 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_s32(int32x2_t a,int32x2_t b)3446 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3447   return vrshl_s32(a, b);
3448 }
3449 
3450 // CHECK-LABEL: @test_vrshl_s64(
3451 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3452 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3453 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3454 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3455 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_s64(int64x1_t a,int64x1_t b)3456 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3457   return vrshl_s64(a, b);
3458 }
3459 
3460 // CHECK-LABEL: @test_vrshl_u8(
3461 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3462 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)3463 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3464   return vrshl_u8(a, b);
3465 }
3466 
3467 // CHECK-LABEL: @test_vrshl_u16(
3468 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3471 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3472 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)3473 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3474   return vrshl_u16(a, b);
3475 }
3476 
3477 // CHECK-LABEL: @test_vrshl_u32(
3478 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3479 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3480 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3481 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3482 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)3483 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3484   return vrshl_u32(a, b);
3485 }
3486 
3487 // CHECK-LABEL: @test_vrshl_u64(
3488 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3489 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3490 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3491 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3492 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)3493 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3494   return vrshl_u64(a, b);
3495 }
3496 
3497 // CHECK-LABEL: @test_vrshlq_s8(
3498 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3499 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)3500 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3501   return vrshlq_s8(a, b);
3502 }
3503 
3504 // CHECK-LABEL: @test_vrshlq_s16(
3505 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3506 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3507 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3508 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3509 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)3510 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3511   return vrshlq_s16(a, b);
3512 }
3513 
3514 // CHECK-LABEL: @test_vrshlq_s32(
3515 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3516 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3517 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3518 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3519 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)3520 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3521   return vrshlq_s32(a, b);
3522 }
3523 
3524 // CHECK-LABEL: @test_vrshlq_s64(
3525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3527 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3528 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3529 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)3530 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3531   return vrshlq_s64(a, b);
3532 }
3533 
3534 // CHECK-LABEL: @test_vrshlq_u8(
3535 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3536 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)3537 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3538   return vrshlq_u8(a, b);
3539 }
3540 
3541 // CHECK-LABEL: @test_vrshlq_u16(
3542 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3543 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3544 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3545 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3546 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)3547 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3548   return vrshlq_u16(a, b);
3549 }
3550 
3551 // CHECK-LABEL: @test_vrshlq_u32(
3552 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3553 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3554 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3555 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3556 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)3557 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3558   return vrshlq_u32(a, b);
3559 }
3560 
3561 // CHECK-LABEL: @test_vrshlq_u64(
3562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3563 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3564 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3565 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3566 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)3567 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3568   return vrshlq_u64(a, b);
3569 }
3570 
3571 // CHECK-LABEL: @test_vqrshl_s8(
3572 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3573 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)3574 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
3575   return vqrshl_s8(a, b);
3576 }
3577 
3578 // CHECK-LABEL: @test_vqrshl_s16(
3579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3580 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3581 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3582 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3583 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)3584 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
3585   return vqrshl_s16(a, b);
3586 }
3587 
3588 // CHECK-LABEL: @test_vqrshl_s32(
3589 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3590 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3591 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3592 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3593 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)3594 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
3595   return vqrshl_s32(a, b);
3596 }
3597 
3598 // CHECK-LABEL: @test_vqrshl_s64(
3599 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3600 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3601 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3602 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3603 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)3604 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
3605   return vqrshl_s64(a, b);
3606 }
3607 
3608 // CHECK-LABEL: @test_vqrshl_u8(
3609 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3610 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)3611 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
3612   return vqrshl_u8(a, b);
3613 }
3614 
3615 // CHECK-LABEL: @test_vqrshl_u16(
3616 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3617 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3618 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3619 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3620 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)3621 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
3622   return vqrshl_u16(a, b);
3623 }
3624 
3625 // CHECK-LABEL: @test_vqrshl_u32(
3626 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3628 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3629 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3630 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)3631 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
3632   return vqrshl_u32(a, b);
3633 }
3634 
3635 // CHECK-LABEL: @test_vqrshl_u64(
3636 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3637 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3638 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3639 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3640 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)3641 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
3642   return vqrshl_u64(a, b);
3643 }
3644 
3645 // CHECK-LABEL: @test_vqrshlq_s8(
3646 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3647 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)3648 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
3649   return vqrshlq_s8(a, b);
3650 }
3651 
3652 // CHECK-LABEL: @test_vqrshlq_s16(
3653 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3654 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3655 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3656 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3657 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)3658 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
3659   return vqrshlq_s16(a, b);
3660 }
3661 
3662 // CHECK-LABEL: @test_vqrshlq_s32(
3663 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3664 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3665 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3666 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3667 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)3668 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
3669   return vqrshlq_s32(a, b);
3670 }
3671 
3672 // CHECK-LABEL: @test_vqrshlq_s64(
3673 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3674 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3675 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3676 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3677 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)3678 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
3679   return vqrshlq_s64(a, b);
3680 }
3681 
3682 // CHECK-LABEL: @test_vqrshlq_u8(
3683 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3684 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)3685 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
3686   return vqrshlq_u8(a, b);
3687 }
3688 
3689 // CHECK-LABEL: @test_vqrshlq_u16(
3690 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3691 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3692 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3693 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3694 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)3695 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
3696   return vqrshlq_u16(a, b);
3697 }
3698 
3699 // CHECK-LABEL: @test_vqrshlq_u32(
3700 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3701 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3702 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3703 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3704 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)3705 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
3706   return vqrshlq_u32(a, b);
3707 }
3708 
3709 // CHECK-LABEL: @test_vqrshlq_u64(
3710 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3711 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3712 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3713 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3714 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)3715 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
3716   return vqrshlq_u64(a, b);
3717 }
3718 
3719 // CHECK-LABEL: @test_vsli_n_p64(
3720 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3721 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3722 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3723 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3724 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
3725 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_p64(poly64x1_t a,poly64x1_t b)3726 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
3727   return vsli_n_p64(a, b, 0);
3728 }
3729 
3730 // CHECK-LABEL: @test_vsliq_n_p64(
3731 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3732 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3733 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3734 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3735 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
3736 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_p64(poly64x2_t a,poly64x2_t b)3737 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
3738   return vsliq_n_p64(a, b, 0);
3739 }
3740 
3741 // CHECK-LABEL: @test_vmax_s8(
3742 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
3743 // CHECK:   ret <8 x i8> [[VMAX_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)3744 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
3745   return vmax_s8(a, b);
3746 }
3747 
3748 // CHECK-LABEL: @test_vmax_s16(
3749 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3750 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3751 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
3752 // CHECK:   ret <4 x i16> [[VMAX2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)3753 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
3754   return vmax_s16(a, b);
3755 }
3756 
3757 // CHECK-LABEL: @test_vmax_s32(
3758 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3759 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3760 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
3761 // CHECK:   ret <2 x i32> [[VMAX2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)3762 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
3763   return vmax_s32(a, b);
3764 }
3765 
3766 // CHECK-LABEL: @test_vmax_u8(
3767 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
3768 // CHECK:   ret <8 x i8> [[VMAX_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)3769 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
3770   return vmax_u8(a, b);
3771 }
3772 
3773 // CHECK-LABEL: @test_vmax_u16(
3774 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3775 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3776 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
3777 // CHECK:   ret <4 x i16> [[VMAX2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)3778 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
3779   return vmax_u16(a, b);
3780 }
3781 
3782 // CHECK-LABEL: @test_vmax_u32(
3783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3784 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3785 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
3786 // CHECK:   ret <2 x i32> [[VMAX2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)3787 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
3788   return vmax_u32(a, b);
3789 }
3790 
3791 // CHECK-LABEL: @test_vmax_f32(
3792 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3793 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3794 // CHECK:   [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
3795 // CHECK:   ret <2 x float> [[VMAX2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)3796 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
3797   return vmax_f32(a, b);
3798 }
3799 
3800 // CHECK-LABEL: @test_vmaxq_s8(
3801 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
3802 // CHECK:   ret <16 x i8> [[VMAX_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)3803 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
3804   return vmaxq_s8(a, b);
3805 }
3806 
3807 // CHECK-LABEL: @test_vmaxq_s16(
3808 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3809 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3810 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
3811 // CHECK:   ret <8 x i16> [[VMAX2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)3812 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
3813   return vmaxq_s16(a, b);
3814 }
3815 
3816 // CHECK-LABEL: @test_vmaxq_s32(
3817 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3818 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3819 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
3820 // CHECK:   ret <4 x i32> [[VMAX2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)3821 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
3822   return vmaxq_s32(a, b);
3823 }
3824 
3825 // CHECK-LABEL: @test_vmaxq_u8(
3826 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
3827 // CHECK:   ret <16 x i8> [[VMAX_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)3828 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
3829   return vmaxq_u8(a, b);
3830 }
3831 
3832 // CHECK-LABEL: @test_vmaxq_u16(
3833 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3834 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3835 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
3836 // CHECK:   ret <8 x i16> [[VMAX2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)3837 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
3838   return vmaxq_u16(a, b);
3839 }
3840 
3841 // CHECK-LABEL: @test_vmaxq_u32(
3842 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3843 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3844 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
3845 // CHECK:   ret <4 x i32> [[VMAX2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)3846 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
3847   return vmaxq_u32(a, b);
3848 }
3849 
3850 // CHECK-LABEL: @test_vmaxq_f32(
3851 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3852 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3853 // CHECK:   [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
3854 // CHECK:   ret <4 x float> [[VMAX2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)3855 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
3856   return vmaxq_f32(a, b);
3857 }
3858 
3859 // CHECK-LABEL: @test_vmaxq_f64(
3860 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3861 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3862 // CHECK:   [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
3863 // CHECK:   ret <2 x double> [[VMAX2_I]]
test_vmaxq_f64(float64x2_t a,float64x2_t b)3864 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
3865   return vmaxq_f64(a, b);
3866 }
3867 
3868 // CHECK-LABEL: @test_vmin_s8(
3869 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
3870 // CHECK:   ret <8 x i8> [[VMIN_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)3871 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
3872   return vmin_s8(a, b);
3873 }
3874 
3875 // CHECK-LABEL: @test_vmin_s16(
3876 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3877 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3878 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
3879 // CHECK:   ret <4 x i16> [[VMIN2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)3880 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
3881   return vmin_s16(a, b);
3882 }
3883 
3884 // CHECK-LABEL: @test_vmin_s32(
3885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3886 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3887 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
3888 // CHECK:   ret <2 x i32> [[VMIN2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)3889 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
3890   return vmin_s32(a, b);
3891 }
3892 
3893 // CHECK-LABEL: @test_vmin_u8(
3894 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
3895 // CHECK:   ret <8 x i8> [[VMIN_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)3896 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
3897   return vmin_u8(a, b);
3898 }
3899 
3900 // CHECK-LABEL: @test_vmin_u16(
3901 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3902 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3903 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
3904 // CHECK:   ret <4 x i16> [[VMIN2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)3905 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
3906   return vmin_u16(a, b);
3907 }
3908 
3909 // CHECK-LABEL: @test_vmin_u32(
3910 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3911 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3912 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
3913 // CHECK:   ret <2 x i32> [[VMIN2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)3914 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
3915   return vmin_u32(a, b);
3916 }
3917 
3918 // CHECK-LABEL: @test_vmin_f32(
3919 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3920 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3921 // CHECK:   [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b)
3922 // CHECK:   ret <2 x float> [[VMIN2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)3923 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
3924   return vmin_f32(a, b);
3925 }
3926 
3927 // CHECK-LABEL: @test_vminq_s8(
3928 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
3929 // CHECK:   ret <16 x i8> [[VMIN_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)3930 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
3931   return vminq_s8(a, b);
3932 }
3933 
3934 // CHECK-LABEL: @test_vminq_s16(
3935 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3936 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3937 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
3938 // CHECK:   ret <8 x i16> [[VMIN2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)3939 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
3940   return vminq_s16(a, b);
3941 }
3942 
3943 // CHECK-LABEL: @test_vminq_s32(
3944 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3945 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3946 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
3947 // CHECK:   ret <4 x i32> [[VMIN2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)3948 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
3949   return vminq_s32(a, b);
3950 }
3951 
3952 // CHECK-LABEL: @test_vminq_u8(
3953 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
3954 // CHECK:   ret <16 x i8> [[VMIN_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)3955 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
3956   return vminq_u8(a, b);
3957 }
3958 
3959 // CHECK-LABEL: @test_vminq_u16(
3960 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3961 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3962 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
3963 // CHECK:   ret <8 x i16> [[VMIN2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)3964 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
3965   return vminq_u16(a, b);
3966 }
3967 
3968 // CHECK-LABEL: @test_vminq_u32(
3969 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3970 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3971 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
3972 // CHECK:   ret <4 x i32> [[VMIN2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)3973 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
3974   return vminq_u32(a, b);
3975 }
3976 
3977 // CHECK-LABEL: @test_vminq_f32(
3978 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3979 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3980 // CHECK:   [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b)
3981 // CHECK:   ret <4 x float> [[VMIN2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)3982 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
3983   return vminq_f32(a, b);
3984 }
3985 
3986 // CHECK-LABEL: @test_vminq_f64(
3987 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3988 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3989 // CHECK:   [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b)
3990 // CHECK:   ret <2 x double> [[VMIN2_I]]
test_vminq_f64(float64x2_t a,float64x2_t b)3991 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
3992   return vminq_f64(a, b);
3993 }
3994 
3995 // CHECK-LABEL: @test_vmaxnm_f32(
3996 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3997 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3998 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
3999 // CHECK:   ret <2 x float> [[VMAXNM2_I]]
test_vmaxnm_f32(float32x2_t a,float32x2_t b)4000 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
4001   return vmaxnm_f32(a, b);
4002 }
4003 
4004 // CHECK-LABEL: @test_vmaxnmq_f32(
4005 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4006 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4007 // CHECK:   [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
4008 // CHECK:   ret <4 x float> [[VMAXNM2_I]]
test_vmaxnmq_f32(float32x4_t a,float32x4_t b)4009 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
4010   return vmaxnmq_f32(a, b);
4011 }
4012 
4013 // CHECK-LABEL: @test_vmaxnmq_f64(
4014 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4015 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4016 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
4017 // CHECK:   ret <2 x double> [[VMAXNM2_I]]
test_vmaxnmq_f64(float64x2_t a,float64x2_t b)4018 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
4019   return vmaxnmq_f64(a, b);
4020 }
4021 
4022 // CHECK-LABEL: @test_vminnm_f32(
4023 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4024 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4025 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
4026 // CHECK:   ret <2 x float> [[VMINNM2_I]]
test_vminnm_f32(float32x2_t a,float32x2_t b)4027 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
4028   return vminnm_f32(a, b);
4029 }
4030 
4031 // CHECK-LABEL: @test_vminnmq_f32(
4032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4033 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4034 // CHECK:   [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
4035 // CHECK:   ret <4 x float> [[VMINNM2_I]]
test_vminnmq_f32(float32x4_t a,float32x4_t b)4036 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
4037   return vminnmq_f32(a, b);
4038 }
4039 
4040 // CHECK-LABEL: @test_vminnmq_f64(
4041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4042 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4043 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
4044 // CHECK:   ret <2 x double> [[VMINNM2_I]]
test_vminnmq_f64(float64x2_t a,float64x2_t b)4045 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
4046   return vminnmq_f64(a, b);
4047 }
4048 
4049 // CHECK-LABEL: @test_vpmax_s8(
4050 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4051 // CHECK:   ret <8 x i8> [[VPMAX_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)4052 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
4053   return vpmax_s8(a, b);
4054 }
4055 
4056 // CHECK-LABEL: @test_vpmax_s16(
4057 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4058 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4059 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4060 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)4061 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
4062   return vpmax_s16(a, b);
4063 }
4064 
4065 // CHECK-LABEL: @test_vpmax_s32(
4066 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4067 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4068 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4069 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)4070 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
4071   return vpmax_s32(a, b);
4072 }
4073 
4074 // CHECK-LABEL: @test_vpmax_u8(
4075 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4076 // CHECK:   ret <8 x i8> [[VPMAX_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)4077 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
4078   return vpmax_u8(a, b);
4079 }
4080 
4081 // CHECK-LABEL: @test_vpmax_u16(
4082 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4083 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4084 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4085 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)4086 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
4087   return vpmax_u16(a, b);
4088 }
4089 
4090 // CHECK-LABEL: @test_vpmax_u32(
4091 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4092 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4093 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4094 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)4095 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
4096   return vpmax_u32(a, b);
4097 }
4098 
4099 // CHECK-LABEL: @test_vpmax_f32(
4100 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4101 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4102 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
4103 // CHECK:   ret <2 x float> [[VPMAX2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)4104 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
4105   return vpmax_f32(a, b);
4106 }
4107 
4108 // CHECK-LABEL: @test_vpmaxq_s8(
4109 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4110 // CHECK:   ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_s8(int8x16_t a,int8x16_t b)4111 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
4112   return vpmaxq_s8(a, b);
4113 }
4114 
4115 // CHECK-LABEL: @test_vpmaxq_s16(
4116 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4117 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4118 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4119 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_s16(int16x8_t a,int16x8_t b)4120 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
4121   return vpmaxq_s16(a, b);
4122 }
4123 
4124 // CHECK-LABEL: @test_vpmaxq_s32(
4125 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4126 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4127 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4128 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_s32(int32x4_t a,int32x4_t b)4129 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
4130   return vpmaxq_s32(a, b);
4131 }
4132 
4133 // CHECK-LABEL: @test_vpmaxq_u8(
4134 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4135 // CHECK:   ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_u8(uint8x16_t a,uint8x16_t b)4136 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
4137   return vpmaxq_u8(a, b);
4138 }
4139 
4140 // CHECK-LABEL: @test_vpmaxq_u16(
4141 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4142 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4143 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4144 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_u16(uint16x8_t a,uint16x8_t b)4145 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
4146   return vpmaxq_u16(a, b);
4147 }
4148 
4149 // CHECK-LABEL: @test_vpmaxq_u32(
4150 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4151 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4152 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4153 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_u32(uint32x4_t a,uint32x4_t b)4154 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
4155   return vpmaxq_u32(a, b);
4156 }
4157 
4158 // CHECK-LABEL: @test_vpmaxq_f32(
4159 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4160 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4161 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
4162 // CHECK:   ret <4 x float> [[VPMAX2_I]]
test_vpmaxq_f32(float32x4_t a,float32x4_t b)4163 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
4164   return vpmaxq_f32(a, b);
4165 }
4166 
4167 // CHECK-LABEL: @test_vpmaxq_f64(
4168 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4169 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4170 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
4171 // CHECK:   ret <2 x double> [[VPMAX2_I]]
test_vpmaxq_f64(float64x2_t a,float64x2_t b)4172 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
4173   return vpmaxq_f64(a, b);
4174 }
4175 
4176 // CHECK-LABEL: @test_vpmin_s8(
4177 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4178 // CHECK:   ret <8 x i8> [[VPMIN_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)4179 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
4180   return vpmin_s8(a, b);
4181 }
4182 
4183 // CHECK-LABEL: @test_vpmin_s16(
4184 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4185 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4186 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4187 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)4188 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
4189   return vpmin_s16(a, b);
4190 }
4191 
4192 // CHECK-LABEL: @test_vpmin_s32(
4193 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4194 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4195 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4196 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)4197 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
4198   return vpmin_s32(a, b);
4199 }
4200 
4201 // CHECK-LABEL: @test_vpmin_u8(
4202 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4203 // CHECK:   ret <8 x i8> [[VPMIN_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)4204 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
4205   return vpmin_u8(a, b);
4206 }
4207 
4208 // CHECK-LABEL: @test_vpmin_u16(
4209 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4210 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4211 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4212 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)4213 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
4214   return vpmin_u16(a, b);
4215 }
4216 
4217 // CHECK-LABEL: @test_vpmin_u32(
4218 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4219 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4220 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4221 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)4222 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
4223   return vpmin_u32(a, b);
4224 }
4225 
4226 // CHECK-LABEL: @test_vpmin_f32(
4227 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4228 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4229 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
4230 // CHECK:   ret <2 x float> [[VPMIN2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)4231 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
4232   return vpmin_f32(a, b);
4233 }
4234 
4235 // CHECK-LABEL: @test_vpminq_s8(
4236 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4237 // CHECK:   ret <16 x i8> [[VPMIN_I]]
test_vpminq_s8(int8x16_t a,int8x16_t b)4238 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
4239   return vpminq_s8(a, b);
4240 }
4241 
4242 // CHECK-LABEL: @test_vpminq_s16(
4243 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4244 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4245 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4246 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
test_vpminq_s16(int16x8_t a,int16x8_t b)4247 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
4248   return vpminq_s16(a, b);
4249 }
4250 
4251 // CHECK-LABEL: @test_vpminq_s32(
4252 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4253 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4254 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4255 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
test_vpminq_s32(int32x4_t a,int32x4_t b)4256 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
4257   return vpminq_s32(a, b);
4258 }
4259 
4260 // CHECK-LABEL: @test_vpminq_u8(
4261 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4262 // CHECK:   ret <16 x i8> [[VPMIN_I]]
test_vpminq_u8(uint8x16_t a,uint8x16_t b)4263 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
4264   return vpminq_u8(a, b);
4265 }
4266 
4267 // CHECK-LABEL: @test_vpminq_u16(
4268 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4269 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4270 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4271 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
test_vpminq_u16(uint16x8_t a,uint16x8_t b)4272 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
4273   return vpminq_u16(a, b);
4274 }
4275 
4276 // CHECK-LABEL: @test_vpminq_u32(
4277 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4278 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4279 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4280 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
test_vpminq_u32(uint32x4_t a,uint32x4_t b)4281 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
4282   return vpminq_u32(a, b);
4283 }
4284 
4285 // CHECK-LABEL: @test_vpminq_f32(
4286 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4287 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4288 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
4289 // CHECK:   ret <4 x float> [[VPMIN2_I]]
test_vpminq_f32(float32x4_t a,float32x4_t b)4290 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
4291   return vpminq_f32(a, b);
4292 }
4293 
4294 // CHECK-LABEL: @test_vpminq_f64(
4295 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4296 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4297 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
4298 // CHECK:   ret <2 x double> [[VPMIN2_I]]
test_vpminq_f64(float64x2_t a,float64x2_t b)4299 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
4300   return vpminq_f64(a, b);
4301 }
4302 
4303 // CHECK-LABEL: @test_vpmaxnm_f32(
4304 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4305 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4306 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
4307 // CHECK:   ret <2 x float> [[VPMAXNM2_I]]
test_vpmaxnm_f32(float32x2_t a,float32x2_t b)4308 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
4309   return vpmaxnm_f32(a, b);
4310 }
4311 
4312 // CHECK-LABEL: @test_vpmaxnmq_f32(
4313 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4314 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4315 // CHECK:   [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
4316 // CHECK:   ret <4 x float> [[VPMAXNM2_I]]
test_vpmaxnmq_f32(float32x4_t a,float32x4_t b)4317 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
4318   return vpmaxnmq_f32(a, b);
4319 }
4320 
4321 // CHECK-LABEL: @test_vpmaxnmq_f64(
4322 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4323 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4324 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
4325 // CHECK:   ret <2 x double> [[VPMAXNM2_I]]
test_vpmaxnmq_f64(float64x2_t a,float64x2_t b)4326 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
4327   return vpmaxnmq_f64(a, b);
4328 }
4329 
4330 // CHECK-LABEL: @test_vpminnm_f32(
4331 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4332 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4333 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
4334 // CHECK:   ret <2 x float> [[VPMINNM2_I]]
test_vpminnm_f32(float32x2_t a,float32x2_t b)4335 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
4336   return vpminnm_f32(a, b);
4337 }
4338 
4339 // CHECK-LABEL: @test_vpminnmq_f32(
4340 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4341 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4342 // CHECK:   [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
4343 // CHECK:   ret <4 x float> [[VPMINNM2_I]]
test_vpminnmq_f32(float32x4_t a,float32x4_t b)4344 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
4345   return vpminnmq_f32(a, b);
4346 }
4347 
4348 // CHECK-LABEL: @test_vpminnmq_f64(
4349 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4350 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4351 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
4352 // CHECK:   ret <2 x double> [[VPMINNM2_I]]
test_vpminnmq_f64(float64x2_t a,float64x2_t b)4353 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
4354   return vpminnmq_f64(a, b);
4355 }
4356 
4357 // CHECK-LABEL: @test_vpadd_s8(
4358 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4359 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)4360 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
4361   return vpadd_s8(a, b);
4362 }
4363 
4364 // CHECK-LABEL: @test_vpadd_s16(
4365 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4366 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4367 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4368 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4369 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_s16(int16x4_t a,int16x4_t b)4370 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
4371   return vpadd_s16(a, b);
4372 }
4373 
4374 // CHECK-LABEL: @test_vpadd_s32(
4375 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4376 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4377 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4378 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4379 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_s32(int32x2_t a,int32x2_t b)4380 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
4381   return vpadd_s32(a, b);
4382 }
4383 
4384 // CHECK-LABEL: @test_vpadd_u8(
4385 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4386 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)4387 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
4388   return vpadd_u8(a, b);
4389 }
4390 
4391 // CHECK-LABEL: @test_vpadd_u16(
4392 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4393 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4394 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4395 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4396 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)4397 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
4398   return vpadd_u16(a, b);
4399 }
4400 
4401 // CHECK-LABEL: @test_vpadd_u32(
4402 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4403 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4404 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4405 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4406 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)4407 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
4408   return vpadd_u32(a, b);
4409 }
4410 
4411 // CHECK-LABEL: @test_vpadd_f32(
4412 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4413 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4414 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
4415 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
4416 // CHECK:   ret <2 x float> [[VPADD_V2_I]]
test_vpadd_f32(float32x2_t a,float32x2_t b)4417 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
4418   return vpadd_f32(a, b);
4419 }
4420 
4421 // CHECK-LABEL: @test_vpaddq_s8(
4422 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4423 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_s8(int8x16_t a,int8x16_t b)4424 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
4425   return vpaddq_s8(a, b);
4426 }
4427 
4428 // CHECK-LABEL: @test_vpaddq_s16(
4429 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4430 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4431 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4432 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4433 // CHECK:   ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_s16(int16x8_t a,int16x8_t b)4434 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
4435   return vpaddq_s16(a, b);
4436 }
4437 
4438 // CHECK-LABEL: @test_vpaddq_s32(
4439 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4440 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4441 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4442 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4443 // CHECK:   ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_s32(int32x4_t a,int32x4_t b)4444 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
4445   return vpaddq_s32(a, b);
4446 }
4447 
4448 // CHECK-LABEL: @test_vpaddq_u8(
4449 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4450 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_u8(uint8x16_t a,uint8x16_t b)4451 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
4452   return vpaddq_u8(a, b);
4453 }
4454 
4455 // CHECK-LABEL: @test_vpaddq_u16(
4456 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4457 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4458 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4459 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4460 // CHECK:   ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_u16(uint16x8_t a,uint16x8_t b)4461 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
4462   return vpaddq_u16(a, b);
4463 }
4464 
4465 // CHECK-LABEL: @test_vpaddq_u32(
4466 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4467 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4468 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4469 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4470 // CHECK:   ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_u32(uint32x4_t a,uint32x4_t b)4471 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
4472   return vpaddq_u32(a, b);
4473 }
4474 
4475 // CHECK-LABEL: @test_vpaddq_f32(
4476 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4477 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4478 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
4479 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
4480 // CHECK:   ret <4 x float> [[VPADDQ_V2_I]]
test_vpaddq_f32(float32x4_t a,float32x4_t b)4481 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
4482   return vpaddq_f32(a, b);
4483 }
4484 
4485 // CHECK-LABEL: @test_vpaddq_f64(
4486 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4487 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4488 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
4489 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
4490 // CHECK:   ret <2 x double> [[VPADDQ_V2_I]]
test_vpaddq_f64(float64x2_t a,float64x2_t b)4491 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
4492   return vpaddq_f64(a, b);
4493 }
4494 
4495 // CHECK-LABEL: @test_vqdmulh_s16(
4496 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4497 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4498 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4499 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
4500 // CHECK:   ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)4501 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
4502   return vqdmulh_s16(a, b);
4503 }
4504 
4505 // CHECK-LABEL: @test_vqdmulh_s32(
4506 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4507 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4508 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4509 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
4510 // CHECK:   ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)4511 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
4512   return vqdmulh_s32(a, b);
4513 }
4514 
4515 // CHECK-LABEL: @test_vqdmulhq_s16(
4516 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4517 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4518 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4519 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
4520 // CHECK:   ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)4521 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
4522   return vqdmulhq_s16(a, b);
4523 }
4524 
4525 // CHECK-LABEL: @test_vqdmulhq_s32(
4526 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4527 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4528 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4529 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
4530 // CHECK:   ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)4531 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
4532   return vqdmulhq_s32(a, b);
4533 }
4534 
4535 // CHECK-LABEL: @test_vqrdmulh_s16(
4536 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4537 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4538 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4539 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
4540 // CHECK:   ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)4541 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
4542   return vqrdmulh_s16(a, b);
4543 }
4544 
4545 // CHECK-LABEL: @test_vqrdmulh_s32(
4546 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4547 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4548 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4549 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
4550 // CHECK:   ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)4551 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
4552   return vqrdmulh_s32(a, b);
4553 }
4554 
4555 // CHECK-LABEL: @test_vqrdmulhq_s16(
4556 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4557 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4558 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4559 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
4560 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)4561 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
4562   return vqrdmulhq_s16(a, b);
4563 }
4564 
4565 // CHECK-LABEL: @test_vqrdmulhq_s32(
4566 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4567 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4568 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4569 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
4570 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)4571 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
4572   return vqrdmulhq_s32(a, b);
4573 }
4574 
4575 // CHECK-LABEL: @test_vmulx_f32(
4576 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4577 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4578 // CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
4579 // CHECK:   ret <2 x float> [[VMULX2_I]]
test_vmulx_f32(float32x2_t a,float32x2_t b)4580 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
4581   return vmulx_f32(a, b);
4582 }
4583 
4584 // CHECK-LABEL: @test_vmulxq_f32(
4585 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4586 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4587 // CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
4588 // CHECK:   ret <4 x float> [[VMULX2_I]]
test_vmulxq_f32(float32x4_t a,float32x4_t b)4589 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
4590   return vmulxq_f32(a, b);
4591 }
4592 
4593 // CHECK-LABEL: @test_vmulxq_f64(
4594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4595 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4596 // CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
4597 // CHECK:   ret <2 x double> [[VMULX2_I]]
test_vmulxq_f64(float64x2_t a,float64x2_t b)4598 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
4599   return vmulxq_f64(a, b);
4600 }
4601 
4602 // CHECK-LABEL: @test_vshl_n_s8(
4603 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4604 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)4605 int8x8_t test_vshl_n_s8(int8x8_t a) {
4606   return vshl_n_s8(a, 3);
4607 }
4608 
4609 // CHECK-LABEL: @test_vshl_n_s16(
4610 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4611 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4612 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4613 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)4614 int16x4_t test_vshl_n_s16(int16x4_t a) {
4615   return vshl_n_s16(a, 3);
4616 }
4617 
4618 // CHECK-LABEL: @test_vshl_n_s32(
4619 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4620 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4621 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4622 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)4623 int32x2_t test_vshl_n_s32(int32x2_t a) {
4624   return vshl_n_s32(a, 3);
4625 }
4626 
4627 // CHECK-LABEL: @test_vshlq_n_s8(
4628 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4629 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)4630 int8x16_t test_vshlq_n_s8(int8x16_t a) {
4631   return vshlq_n_s8(a, 3);
4632 }
4633 
4634 // CHECK-LABEL: @test_vshlq_n_s16(
4635 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4636 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4637 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4638 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)4639 int16x8_t test_vshlq_n_s16(int16x8_t a) {
4640   return vshlq_n_s16(a, 3);
4641 }
4642 
4643 // CHECK-LABEL: @test_vshlq_n_s32(
4644 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4645 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4646 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4647 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)4648 int32x4_t test_vshlq_n_s32(int32x4_t a) {
4649   return vshlq_n_s32(a, 3);
4650 }
4651 
4652 // CHECK-LABEL: @test_vshlq_n_s64(
4653 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4654 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4655 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4656 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)4657 int64x2_t test_vshlq_n_s64(int64x2_t a) {
4658   return vshlq_n_s64(a, 3);
4659 }
4660 
4661 // CHECK-LABEL: @test_vshl_n_u8(
4662 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4663 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(int8x8_t a)4664 int8x8_t test_vshl_n_u8(int8x8_t a) {
4665   return vshl_n_u8(a, 3);
4666 }
4667 
4668 // CHECK-LABEL: @test_vshl_n_u16(
4669 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4670 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4671 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4672 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(int16x4_t a)4673 int16x4_t test_vshl_n_u16(int16x4_t a) {
4674   return vshl_n_u16(a, 3);
4675 }
4676 
4677 // CHECK-LABEL: @test_vshl_n_u32(
4678 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4679 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4680 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4681 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(int32x2_t a)4682 int32x2_t test_vshl_n_u32(int32x2_t a) {
4683   return vshl_n_u32(a, 3);
4684 }
4685 
4686 // CHECK-LABEL: @test_vshlq_n_u8(
4687 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4688 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(int8x16_t a)4689 int8x16_t test_vshlq_n_u8(int8x16_t a) {
4690   return vshlq_n_u8(a, 3);
4691 }
4692 
4693 // CHECK-LABEL: @test_vshlq_n_u16(
4694 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4695 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4696 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4697 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(int16x8_t a)4698 int16x8_t test_vshlq_n_u16(int16x8_t a) {
4699   return vshlq_n_u16(a, 3);
4700 }
4701 
4702 // CHECK-LABEL: @test_vshlq_n_u32(
4703 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4704 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4706 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(int32x4_t a)4707 int32x4_t test_vshlq_n_u32(int32x4_t a) {
4708   return vshlq_n_u32(a, 3);
4709 }
4710 
4711 // CHECK-LABEL: @test_vshlq_n_u64(
4712 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4713 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4714 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4715 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(int64x2_t a)4716 int64x2_t test_vshlq_n_u64(int64x2_t a) {
4717   return vshlq_n_u64(a, 3);
4718 }
4719 
4720 // CHECK-LABEL: @test_vshr_n_s8(
4721 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4722 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)4723 int8x8_t test_vshr_n_s8(int8x8_t a) {
4724   return vshr_n_s8(a, 3);
4725 }
4726 
4727 // CHECK-LABEL: @test_vshr_n_s16(
4728 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4729 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4730 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4731 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)4732 int16x4_t test_vshr_n_s16(int16x4_t a) {
4733   return vshr_n_s16(a, 3);
4734 }
4735 
4736 // CHECK-LABEL: @test_vshr_n_s32(
4737 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4738 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4739 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
4740 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)4741 int32x2_t test_vshr_n_s32(int32x2_t a) {
4742   return vshr_n_s32(a, 3);
4743 }
4744 
4745 // CHECK-LABEL: @test_vshrq_n_s8(
4746 // CHECK:   [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4747 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)4748 int8x16_t test_vshrq_n_s8(int8x16_t a) {
4749   return vshrq_n_s8(a, 3);
4750 }
4751 
4752 // CHECK-LABEL: @test_vshrq_n_s16(
4753 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4754 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4755 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4756 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)4757 int16x8_t test_vshrq_n_s16(int16x8_t a) {
4758   return vshrq_n_s16(a, 3);
4759 }
4760 
4761 // CHECK-LABEL: @test_vshrq_n_s32(
4762 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4763 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4764 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4765 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)4766 int32x4_t test_vshrq_n_s32(int32x4_t a) {
4767   return vshrq_n_s32(a, 3);
4768 }
4769 
4770 // CHECK-LABEL: @test_vshrq_n_s64(
4771 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4772 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4773 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
4774 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)4775 int64x2_t test_vshrq_n_s64(int64x2_t a) {
4776   return vshrq_n_s64(a, 3);
4777 }
4778 
4779 // CHECK-LABEL: @test_vshr_n_u8(
4780 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4781 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(int8x8_t a)4782 int8x8_t test_vshr_n_u8(int8x8_t a) {
4783   return vshr_n_u8(a, 3);
4784 }
4785 
4786 // CHECK-LABEL: @test_vshr_n_u16(
4787 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4788 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4789 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4790 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(int16x4_t a)4791 int16x4_t test_vshr_n_u16(int16x4_t a) {
4792   return vshr_n_u16(a, 3);
4793 }
4794 
4795 // CHECK-LABEL: @test_vshr_n_u32(
4796 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4797 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4798 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
4799 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(int32x2_t a)4800 int32x2_t test_vshr_n_u32(int32x2_t a) {
4801   return vshr_n_u32(a, 3);
4802 }
4803 
4804 // CHECK-LABEL: @test_vshrq_n_u8(
4805 // CHECK:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4806 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(int8x16_t a)4807 int8x16_t test_vshrq_n_u8(int8x16_t a) {
4808   return vshrq_n_u8(a, 3);
4809 }
4810 
4811 // CHECK-LABEL: @test_vshrq_n_u16(
4812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4813 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4815 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(int16x8_t a)4816 int16x8_t test_vshrq_n_u16(int16x8_t a) {
4817   return vshrq_n_u16(a, 3);
4818 }
4819 
4820 // CHECK-LABEL: @test_vshrq_n_u32(
4821 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4822 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4823 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4824 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(int32x4_t a)4825 int32x4_t test_vshrq_n_u32(int32x4_t a) {
4826   return vshrq_n_u32(a, 3);
4827 }
4828 
4829 // CHECK-LABEL: @test_vshrq_n_u64(
4830 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4831 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4832 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
4833 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(int64x2_t a)4834 int64x2_t test_vshrq_n_u64(int64x2_t a) {
4835   return vshrq_n_u64(a, 3);
4836 }
4837 
4838 // CHECK-LABEL: @test_vsra_n_s8(
4839 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4840 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4841 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)4842 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
4843   return vsra_n_s8(a, b, 3);
4844 }
4845 
4846 // CHECK-LABEL: @test_vsra_n_s16(
4847 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4848 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4849 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4850 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4851 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4852 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4853 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)4854 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
4855   return vsra_n_s16(a, b, 3);
4856 }
4857 
4858 // CHECK-LABEL: @test_vsra_n_s32(
4859 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4860 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4861 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4862 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4863 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
4864 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4865 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)4866 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
4867   return vsra_n_s32(a, b, 3);
4868 }
4869 
4870 // CHECK-LABEL: @test_vsraq_n_s8(
4871 // CHECK:   [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4872 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4873 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)4874 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
4875   return vsraq_n_s8(a, b, 3);
4876 }
4877 
4878 // CHECK-LABEL: @test_vsraq_n_s16(
4879 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4880 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4881 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4882 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4883 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4884 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4885 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)4886 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
4887   return vsraq_n_s16(a, b, 3);
4888 }
4889 
4890 // CHECK-LABEL: @test_vsraq_n_s32(
4891 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4892 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4893 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4894 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4895 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4896 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4897 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)4898 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
4899   return vsraq_n_s32(a, b, 3);
4900 }
4901 
4902 // CHECK-LABEL: @test_vsraq_n_s64(
4903 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4904 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4905 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4906 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4907 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
4908 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4909 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)4910 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
4911   return vsraq_n_s64(a, b, 3);
4912 }
4913 
4914 // CHECK-LABEL: @test_vsra_n_u8(
4915 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4916 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4917 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_u8(int8x8_t a,int8x8_t b)4918 int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) {
4919   return vsra_n_u8(a, b, 3);
4920 }
4921 
4922 // CHECK-LABEL: @test_vsra_n_u16(
4923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4925 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4926 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4927 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4928 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4929 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_u16(int16x4_t a,int16x4_t b)4930 int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) {
4931   return vsra_n_u16(a, b, 3);
4932 }
4933 
4934 // CHECK-LABEL: @test_vsra_n_u32(
4935 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4936 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4937 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4938 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4939 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
4940 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4941 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_u32(int32x2_t a,int32x2_t b)4942 int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) {
4943   return vsra_n_u32(a, b, 3);
4944 }
4945 
4946 // CHECK-LABEL: @test_vsraq_n_u8(
4947 // CHECK:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4948 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4949 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(int8x16_t a,int8x16_t b)4950 int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) {
4951   return vsraq_n_u8(a, b, 3);
4952 }
4953 
4954 // CHECK-LABEL: @test_vsraq_n_u16(
4955 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4956 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4957 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4958 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4959 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4960 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4961 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(int16x8_t a,int16x8_t b)4962 int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) {
4963   return vsraq_n_u16(a, b, 3);
4964 }
4965 
4966 // CHECK-LABEL: @test_vsraq_n_u32(
4967 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4968 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4969 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4970 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4971 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4972 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4973 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(int32x4_t a,int32x4_t b)4974 int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) {
4975   return vsraq_n_u32(a, b, 3);
4976 }
4977 
4978 // CHECK-LABEL: @test_vsraq_n_u64(
4979 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4980 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4981 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4982 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4983 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
4984 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4985 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(int64x2_t a,int64x2_t b)4986 int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) {
4987   return vsraq_n_u64(a, b, 3);
4988 }
4989 
4990 // CHECK-LABEL: @test_vrshr_n_s8(
4991 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
4992 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)4993 int8x8_t test_vrshr_n_s8(int8x8_t a) {
4994   return vrshr_n_s8(a, 3);
4995 }
4996 
4997 // CHECK-LABEL: @test_vrshr_n_s16(
4998 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4999 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5000 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5001 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)5002 int16x4_t test_vrshr_n_s16(int16x4_t a) {
5003   return vrshr_n_s16(a, 3);
5004 }
5005 
5006 // CHECK-LABEL: @test_vrshr_n_s32(
5007 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5008 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5009 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5010 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)5011 int32x2_t test_vrshr_n_s32(int32x2_t a) {
5012   return vrshr_n_s32(a, 3);
5013 }
5014 
5015 // CHECK-LABEL: @test_vrshrq_n_s8(
5016 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5017 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)5018 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
5019   return vrshrq_n_s8(a, 3);
5020 }
5021 
5022 // CHECK-LABEL: @test_vrshrq_n_s16(
5023 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5024 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5025 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5026 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)5027 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
5028   return vrshrq_n_s16(a, 3);
5029 }
5030 
5031 // CHECK-LABEL: @test_vrshrq_n_s32(
5032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5033 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5034 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5035 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)5036 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
5037   return vrshrq_n_s32(a, 3);
5038 }
5039 
5040 // CHECK-LABEL: @test_vrshrq_n_s64(
5041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5042 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5043 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5044 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)5045 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
5046   return vrshrq_n_s64(a, 3);
5047 }
5048 
5049 // CHECK-LABEL: @test_vrshr_n_u8(
5050 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5051 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(int8x8_t a)5052 int8x8_t test_vrshr_n_u8(int8x8_t a) {
5053   return vrshr_n_u8(a, 3);
5054 }
5055 
5056 // CHECK-LABEL: @test_vrshr_n_u16(
5057 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5058 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5059 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5060 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(int16x4_t a)5061 int16x4_t test_vrshr_n_u16(int16x4_t a) {
5062   return vrshr_n_u16(a, 3);
5063 }
5064 
5065 // CHECK-LABEL: @test_vrshr_n_u32(
5066 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5067 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5068 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5069 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(int32x2_t a)5070 int32x2_t test_vrshr_n_u32(int32x2_t a) {
5071   return vrshr_n_u32(a, 3);
5072 }
5073 
5074 // CHECK-LABEL: @test_vrshrq_n_u8(
5075 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5076 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(int8x16_t a)5077 int8x16_t test_vrshrq_n_u8(int8x16_t a) {
5078   return vrshrq_n_u8(a, 3);
5079 }
5080 
5081 // CHECK-LABEL: @test_vrshrq_n_u16(
5082 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5083 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5084 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5085 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(int16x8_t a)5086 int16x8_t test_vrshrq_n_u16(int16x8_t a) {
5087   return vrshrq_n_u16(a, 3);
5088 }
5089 
5090 // CHECK-LABEL: @test_vrshrq_n_u32(
5091 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5092 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5093 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5094 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(int32x4_t a)5095 int32x4_t test_vrshrq_n_u32(int32x4_t a) {
5096   return vrshrq_n_u32(a, 3);
5097 }
5098 
5099 // CHECK-LABEL: @test_vrshrq_n_u64(
5100 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5101 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5102 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5103 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(int64x2_t a)5104 int64x2_t test_vrshrq_n_u64(int64x2_t a) {
5105   return vrshrq_n_u64(a, 3);
5106 }
5107 
5108 // CHECK-LABEL: @test_vrsra_n_s8(
5109 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5110 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5111 // CHECK:   ret <8 x i8> [[TMP0]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)5112 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
5113   return vrsra_n_s8(a, b, 3);
5114 }
5115 
5116 // CHECK-LABEL: @test_vrsra_n_s16(
5117 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5118 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5119 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5120 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5121 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5122 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5123 // CHECK:   ret <4 x i16> [[TMP3]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)5124 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
5125   return vrsra_n_s16(a, b, 3);
5126 }
5127 
5128 // CHECK-LABEL: @test_vrsra_n_s32(
5129 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5130 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5131 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5132 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5133 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5134 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5135 // CHECK:   ret <2 x i32> [[TMP3]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)5136 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
5137   return vrsra_n_s32(a, b, 3);
5138 }
5139 
5140 // CHECK-LABEL: @test_vrsraq_n_s8(
5141 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5142 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5143 // CHECK:   ret <16 x i8> [[TMP0]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)5144 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
5145   return vrsraq_n_s8(a, b, 3);
5146 }
5147 
5148 // CHECK-LABEL: @test_vrsraq_n_s16(
5149 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5150 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5151 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5152 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5153 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5154 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5155 // CHECK:   ret <8 x i16> [[TMP3]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)5156 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
5157   return vrsraq_n_s16(a, b, 3);
5158 }
5159 
5160 // CHECK-LABEL: @test_vrsraq_n_s32(
5161 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5162 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5163 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5165 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5166 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5167 // CHECK:   ret <4 x i32> [[TMP3]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)5168 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
5169   return vrsraq_n_s32(a, b, 3);
5170 }
5171 
5172 // CHECK-LABEL: @test_vrsraq_n_s64(
5173 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5174 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5175 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5176 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5177 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5178 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5179 // CHECK:   ret <2 x i64> [[TMP3]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)5180 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
5181   return vrsraq_n_s64(a, b, 3);
5182 }
5183 
5184 // CHECK-LABEL: @test_vrsra_n_u8(
5185 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5186 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5187 // CHECK:   ret <8 x i8> [[TMP0]]
test_vrsra_n_u8(int8x8_t a,int8x8_t b)5188 int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) {
5189   return vrsra_n_u8(a, b, 3);
5190 }
5191 
5192 // CHECK-LABEL: @test_vrsra_n_u16(
5193 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5194 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5195 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5196 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5197 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5198 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5199 // CHECK:   ret <4 x i16> [[TMP3]]
test_vrsra_n_u16(int16x4_t a,int16x4_t b)5200 int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) {
5201   return vrsra_n_u16(a, b, 3);
5202 }
5203 
5204 // CHECK-LABEL: @test_vrsra_n_u32(
5205 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5206 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5207 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5208 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5209 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5210 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5211 // CHECK:   ret <2 x i32> [[TMP3]]
test_vrsra_n_u32(int32x2_t a,int32x2_t b)5212 int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) {
5213   return vrsra_n_u32(a, b, 3);
5214 }
5215 
5216 // CHECK-LABEL: @test_vrsraq_n_u8(
5217 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5218 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5219 // CHECK:   ret <16 x i8> [[TMP0]]
test_vrsraq_n_u8(int8x16_t a,int8x16_t b)5220 int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) {
5221   return vrsraq_n_u8(a, b, 3);
5222 }
5223 
5224 // CHECK-LABEL: @test_vrsraq_n_u16(
5225 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5226 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5227 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5228 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5229 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5230 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5231 // CHECK:   ret <8 x i16> [[TMP3]]
test_vrsraq_n_u16(int16x8_t a,int16x8_t b)5232 int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) {
5233   return vrsraq_n_u16(a, b, 3);
5234 }
5235 
5236 // CHECK-LABEL: @test_vrsraq_n_u32(
5237 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5238 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5239 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5240 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5241 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5242 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5243 // CHECK:   ret <4 x i32> [[TMP3]]
test_vrsraq_n_u32(int32x4_t a,int32x4_t b)5244 int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) {
5245   return vrsraq_n_u32(a, b, 3);
5246 }
5247 
5248 // CHECK-LABEL: @test_vrsraq_n_u64(
5249 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5250 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5251 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5252 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5253 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5254 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5255 // CHECK:   ret <2 x i64> [[TMP3]]
test_vrsraq_n_u64(int64x2_t a,int64x2_t b)5256 int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) {
5257   return vrsraq_n_u64(a, b, 3);
5258 }
5259 
5260 // CHECK-LABEL: @test_vsri_n_s8(
5261 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5262 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)5263 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
5264   return vsri_n_s8(a, b, 3);
5265 }
5266 
5267 // CHECK-LABEL: @test_vsri_n_s16(
5268 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5269 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5270 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5271 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5272 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5273 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)5274 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
5275   return vsri_n_s16(a, b, 3);
5276 }
5277 
5278 // CHECK-LABEL: @test_vsri_n_s32(
5279 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5280 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5281 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5282 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5283 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5284 // CHECK:   ret <2 x i32> [[VSRI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)5285 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
5286   return vsri_n_s32(a, b, 3);
5287 }
5288 
5289 // CHECK-LABEL: @test_vsriq_n_s8(
5290 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5291 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)5292 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
5293   return vsriq_n_s8(a, b, 3);
5294 }
5295 
5296 // CHECK-LABEL: @test_vsriq_n_s16(
5297 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5298 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5299 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5300 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5301 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5302 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)5303 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
5304   return vsriq_n_s16(a, b, 3);
5305 }
5306 
5307 // CHECK-LABEL: @test_vsriq_n_s32(
5308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5309 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5310 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5311 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5312 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5313 // CHECK:   ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)5314 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
5315   return vsriq_n_s32(a, b, 3);
5316 }
5317 
5318 // CHECK-LABEL: @test_vsriq_n_s64(
5319 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5320 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5321 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5322 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5323 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5324 // CHECK:   ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)5325 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
5326   return vsriq_n_s64(a, b, 3);
5327 }
5328 
5329 // CHECK-LABEL: @test_vsri_n_u8(
5330 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5331 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_u8(int8x8_t a,int8x8_t b)5332 int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) {
5333   return vsri_n_u8(a, b, 3);
5334 }
5335 
5336 // CHECK-LABEL: @test_vsri_n_u16(
5337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5339 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5340 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5341 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5342 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_u16(int16x4_t a,int16x4_t b)5343 int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) {
5344   return vsri_n_u16(a, b, 3);
5345 }
5346 
5347 // CHECK-LABEL: @test_vsri_n_u32(
5348 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5349 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5350 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5351 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5352 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5353 // CHECK:   ret <2 x i32> [[VSRI_N2]]
test_vsri_n_u32(int32x2_t a,int32x2_t b)5354 int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) {
5355   return vsri_n_u32(a, b, 3);
5356 }
5357 
5358 // CHECK-LABEL: @test_vsriq_n_u8(
5359 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5360 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_u8(int8x16_t a,int8x16_t b)5361 int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) {
5362   return vsriq_n_u8(a, b, 3);
5363 }
5364 
5365 // CHECK-LABEL: @test_vsriq_n_u16(
5366 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5367 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5368 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5369 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5370 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5371 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_u16(int16x8_t a,int16x8_t b)5372 int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) {
5373   return vsriq_n_u16(a, b, 3);
5374 }
5375 
5376 // CHECK-LABEL: @test_vsriq_n_u32(
5377 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5378 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5379 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5380 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5381 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5382 // CHECK:   ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_u32(int32x4_t a,int32x4_t b)5383 int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) {
5384   return vsriq_n_u32(a, b, 3);
5385 }
5386 
5387 // CHECK-LABEL: @test_vsriq_n_u64(
5388 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5389 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5390 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5391 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5392 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5393 // CHECK:   ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_u64(int64x2_t a,int64x2_t b)5394 int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) {
5395   return vsriq_n_u64(a, b, 3);
5396 }
5397 
5398 // CHECK-LABEL: @test_vsri_n_p8(
5399 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5400 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)5401 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
5402   return vsri_n_p8(a, b, 3);
5403 }
5404 
5405 // CHECK-LABEL: @test_vsri_n_p16(
5406 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5407 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5408 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5409 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5410 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
5411 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)5412 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
5413   return vsri_n_p16(a, b, 15);
5414 }
5415 
5416 // CHECK-LABEL: @test_vsriq_n_p8(
5417 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5418 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)5419 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
5420   return vsriq_n_p8(a, b, 3);
5421 }
5422 
5423 // CHECK-LABEL: @test_vsriq_n_p16(
5424 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5425 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5426 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5427 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5428 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
5429 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)5430 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
5431   return vsriq_n_p16(a, b, 15);
5432 }
5433 
5434 // CHECK-LABEL: @test_vsli_n_s8(
5435 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5436 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)5437 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
5438   return vsli_n_s8(a, b, 3);
5439 }
5440 
5441 // CHECK-LABEL: @test_vsli_n_s16(
5442 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5444 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5445 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5446 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5447 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)5448 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
5449   return vsli_n_s16(a, b, 3);
5450 }
5451 
5452 // CHECK-LABEL: @test_vsli_n_s32(
5453 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5454 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5455 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5456 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5457 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5458 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)5459 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
5460   return vsli_n_s32(a, b, 3);
5461 }
5462 
5463 // CHECK-LABEL: @test_vsliq_n_s8(
5464 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5465 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)5466 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
5467   return vsliq_n_s8(a, b, 3);
5468 }
5469 
5470 // CHECK-LABEL: @test_vsliq_n_s16(
5471 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5472 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5473 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5474 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5475 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5476 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)5477 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
5478   return vsliq_n_s16(a, b, 3);
5479 }
5480 
5481 // CHECK-LABEL: @test_vsliq_n_s32(
5482 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5483 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5484 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5485 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5486 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5487 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)5488 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
5489   return vsliq_n_s32(a, b, 3);
5490 }
5491 
5492 // CHECK-LABEL: @test_vsliq_n_s64(
5493 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5494 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5495 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5496 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5497 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5498 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)5499 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
5500   return vsliq_n_s64(a, b, 3);
5501 }
5502 
5503 // CHECK-LABEL: @test_vsli_n_u8(
5504 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5505 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)5506 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
5507   return vsli_n_u8(a, b, 3);
5508 }
5509 
5510 // CHECK-LABEL: @test_vsli_n_u16(
5511 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5512 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5513 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5514 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5515 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5516 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)5517 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
5518   return vsli_n_u16(a, b, 3);
5519 }
5520 
5521 // CHECK-LABEL: @test_vsli_n_u32(
5522 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5523 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5524 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5525 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5526 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5527 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)5528 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
5529   return vsli_n_u32(a, b, 3);
5530 }
5531 
5532 // CHECK-LABEL: @test_vsliq_n_u8(
5533 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5534 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)5535 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
5536   return vsliq_n_u8(a, b, 3);
5537 }
5538 
5539 // CHECK-LABEL: @test_vsliq_n_u16(
5540 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5541 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5542 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5543 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5544 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5545 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)5546 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
5547   return vsliq_n_u16(a, b, 3);
5548 }
5549 
5550 // CHECK-LABEL: @test_vsliq_n_u32(
5551 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5552 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5553 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5554 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5555 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5556 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)5557 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
5558   return vsliq_n_u32(a, b, 3);
5559 }
5560 
5561 // CHECK-LABEL: @test_vsliq_n_u64(
5562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5563 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5564 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5565 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5566 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5567 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)5568 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
5569   return vsliq_n_u64(a, b, 3);
5570 }
5571 
5572 // CHECK-LABEL: @test_vsli_n_p8(
5573 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5574 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)5575 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
5576   return vsli_n_p8(a, b, 3);
5577 }
5578 
5579 // CHECK-LABEL: @test_vsli_n_p16(
5580 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5581 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5582 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5583 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5584 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
5585 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)5586 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
5587   return vsli_n_p16(a, b, 15);
5588 }
5589 
5590 // CHECK-LABEL: @test_vsliq_n_p8(
5591 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5592 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)5593 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
5594   return vsliq_n_p8(a, b, 3);
5595 }
5596 
5597 // CHECK-LABEL: @test_vsliq_n_p16(
5598 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5599 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5600 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5601 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5602 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
5603 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)5604 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
5605   return vsliq_n_p16(a, b, 15);
5606 }
5607 
5608 // CHECK-LABEL: @test_vqshlu_n_s8(
5609 // CHECK:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5610 // CHECK:   ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)5611 int8x8_t test_vqshlu_n_s8(int8x8_t a) {
5612   return vqshlu_n_s8(a, 3);
5613 }
5614 
5615 // CHECK-LABEL: @test_vqshlu_n_s16(
5616 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5617 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5618 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
5619 // CHECK:   ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)5620 int16x4_t test_vqshlu_n_s16(int16x4_t a) {
5621   return vqshlu_n_s16(a, 3);
5622 }
5623 
5624 // CHECK-LABEL: @test_vqshlu_n_s32(
5625 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5626 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5627 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
5628 // CHECK:   ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)5629 int32x2_t test_vqshlu_n_s32(int32x2_t a) {
5630   return vqshlu_n_s32(a, 3);
5631 }
5632 
5633 // CHECK-LABEL: @test_vqshluq_n_s8(
5634 // CHECK:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5635 // CHECK:   ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)5636 int8x16_t test_vqshluq_n_s8(int8x16_t a) {
5637   return vqshluq_n_s8(a, 3);
5638 }
5639 
5640 // CHECK-LABEL: @test_vqshluq_n_s16(
5641 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5642 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5643 // CHECK:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
5644 // CHECK:   ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)5645 int16x8_t test_vqshluq_n_s16(int16x8_t a) {
5646   return vqshluq_n_s16(a, 3);
5647 }
5648 
5649 // CHECK-LABEL: @test_vqshluq_n_s32(
5650 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5651 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5652 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
5653 // CHECK:   ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)5654 int32x4_t test_vqshluq_n_s32(int32x4_t a) {
5655   return vqshluq_n_s32(a, 3);
5656 }
5657 
5658 // CHECK-LABEL: @test_vqshluq_n_s64(
5659 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5660 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5661 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
5662 // CHECK:   ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)5663 int64x2_t test_vqshluq_n_s64(int64x2_t a) {
5664   return vqshluq_n_s64(a, 3);
5665 }
5666 
5667 // CHECK-LABEL: @test_vshrn_n_s16(
5668 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5669 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5670 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5671 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5672 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)5673 int8x8_t test_vshrn_n_s16(int16x8_t a) {
5674   return vshrn_n_s16(a, 3);
5675 }
5676 
5677 // CHECK-LABEL: @test_vshrn_n_s32(
5678 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5679 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5680 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5681 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5682 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)5683 int16x4_t test_vshrn_n_s32(int32x4_t a) {
5684   return vshrn_n_s32(a, 9);
5685 }
5686 
5687 // CHECK-LABEL: @test_vshrn_n_s64(
5688 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5689 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5690 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5691 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5692 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)5693 int32x2_t test_vshrn_n_s64(int64x2_t a) {
5694   return vshrn_n_s64(a, 19);
5695 }
5696 
5697 // CHECK-LABEL: @test_vshrn_n_u16(
5698 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5699 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5700 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5701 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5702 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)5703 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
5704   return vshrn_n_u16(a, 3);
5705 }
5706 
5707 // CHECK-LABEL: @test_vshrn_n_u32(
5708 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5709 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5710 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5711 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5712 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)5713 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
5714   return vshrn_n_u32(a, 9);
5715 }
5716 
5717 // CHECK-LABEL: @test_vshrn_n_u64(
5718 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5719 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5720 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5721 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5722 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)5723 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
5724   return vshrn_n_u64(a, 19);
5725 }
5726 
5727 // CHECK-LABEL: @test_vshrn_high_n_s16(
5728 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5729 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5730 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5731 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5732 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5733 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_s16(int8x8_t a,int16x8_t b)5734 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5735   return vshrn_high_n_s16(a, b, 3);
5736 }
5737 
5738 // CHECK-LABEL: @test_vshrn_high_n_s32(
5739 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5740 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5741 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5742 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5743 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5744 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_s32(int16x4_t a,int32x4_t b)5745 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5746   return vshrn_high_n_s32(a, b, 9);
5747 }
5748 
5749 // CHECK-LABEL: @test_vshrn_high_n_s64(
5750 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5751 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5752 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5753 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5754 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5755 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_s64(int32x2_t a,int64x2_t b)5756 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5757   return vshrn_high_n_s64(a, b, 19);
5758 }
5759 
5760 // CHECK-LABEL: @test_vshrn_high_n_u16(
5761 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5762 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5763 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5764 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5765 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5766 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5767 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5768   return vshrn_high_n_u16(a, b, 3);
5769 }
5770 
5771 // CHECK-LABEL: @test_vshrn_high_n_u32(
5772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5773 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5774 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5775 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5776 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5777 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5778 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5779   return vshrn_high_n_u32(a, b, 9);
5780 }
5781 
5782 // CHECK-LABEL: @test_vshrn_high_n_u64(
5783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5784 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5785 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5786 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5787 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5788 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5789 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5790   return vshrn_high_n_u64(a, b, 19);
5791 }
5792 
5793 // CHECK-LABEL: @test_vqshrun_n_s16(
5794 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5795 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5796 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5797 // CHECK:   ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)5798 int8x8_t test_vqshrun_n_s16(int16x8_t a) {
5799   return vqshrun_n_s16(a, 3);
5800 }
5801 
5802 // CHECK-LABEL: @test_vqshrun_n_s32(
5803 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5804 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5805 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5806 // CHECK:   ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)5807 int16x4_t test_vqshrun_n_s32(int32x4_t a) {
5808   return vqshrun_n_s32(a, 9);
5809 }
5810 
5811 // CHECK-LABEL: @test_vqshrun_n_s64(
5812 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5813 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5814 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5815 // CHECK:   ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)5816 int32x2_t test_vqshrun_n_s64(int64x2_t a) {
5817   return vqshrun_n_s64(a, 19);
5818 }
5819 
5820 // CHECK-LABEL: @test_vqshrun_high_n_s16(
5821 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5822 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5823 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5824 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5825 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrun_high_n_s16(int8x8_t a,int16x8_t b)5826 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5827   return vqshrun_high_n_s16(a, b, 3);
5828 }
5829 
5830 // CHECK-LABEL: @test_vqshrun_high_n_s32(
5831 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5832 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5833 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5834 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5835 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrun_high_n_s32(int16x4_t a,int32x4_t b)5836 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
5837   return vqshrun_high_n_s32(a, b, 9);
5838 }
5839 
5840 // CHECK-LABEL: @test_vqshrun_high_n_s64(
5841 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5842 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5843 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5844 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5845 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrun_high_n_s64(int32x2_t a,int64x2_t b)5846 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
5847   return vqshrun_high_n_s64(a, b, 19);
5848 }
5849 
5850 // CHECK-LABEL: @test_vrshrn_n_s16(
5851 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5852 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5853 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5854 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)5855 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
5856   return vrshrn_n_s16(a, 3);
5857 }
5858 
5859 // CHECK-LABEL: @test_vrshrn_n_s32(
5860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5861 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5862 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5863 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)5864 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
5865   return vrshrn_n_s32(a, 9);
5866 }
5867 
5868 // CHECK-LABEL: @test_vrshrn_n_s64(
5869 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5870 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5871 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5872 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)5873 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
5874   return vrshrn_n_s64(a, 19);
5875 }
5876 
5877 // CHECK-LABEL: @test_vrshrn_n_u16(
5878 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5879 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5880 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5881 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)5882 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
5883   return vrshrn_n_u16(a, 3);
5884 }
5885 
5886 // CHECK-LABEL: @test_vrshrn_n_u32(
5887 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5888 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5889 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5890 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)5891 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
5892   return vrshrn_n_u32(a, 9);
5893 }
5894 
5895 // CHECK-LABEL: @test_vrshrn_n_u64(
5896 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5897 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5898 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5899 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)5900 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
5901   return vrshrn_n_u64(a, 19);
5902 }
5903 
5904 // CHECK-LABEL: @test_vrshrn_high_n_s16(
5905 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5906 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5907 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5908 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5909 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_s16(int8x8_t a,int16x8_t b)5910 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5911   return vrshrn_high_n_s16(a, b, 3);
5912 }
5913 
5914 // CHECK-LABEL: @test_vrshrn_high_n_s32(
5915 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5916 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5917 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5918 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5919 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_s32(int16x4_t a,int32x4_t b)5920 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5921   return vrshrn_high_n_s32(a, b, 9);
5922 }
5923 
5924 // CHECK-LABEL: @test_vrshrn_high_n_s64(
5925 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5926 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5927 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5928 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5929 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_s64(int32x2_t a,int64x2_t b)5930 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5931   return vrshrn_high_n_s64(a, b, 19);
5932 }
5933 
5934 // CHECK-LABEL: @test_vrshrn_high_n_u16(
5935 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5936 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5937 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5938 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5939 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5940 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5941   return vrshrn_high_n_u16(a, b, 3);
5942 }
5943 
5944 // CHECK-LABEL: @test_vrshrn_high_n_u32(
5945 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5946 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5947 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5948 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5949 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5950 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5951   return vrshrn_high_n_u32(a, b, 9);
5952 }
5953 
5954 // CHECK-LABEL: @test_vrshrn_high_n_u64(
5955 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5956 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5957 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5958 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5959 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5960 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5961   return vrshrn_high_n_u64(a, b, 19);
5962 }
5963 
5964 // CHECK-LABEL: @test_vqrshrun_n_s16(
5965 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5966 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5967 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5968 // CHECK:   ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)5969 int8x8_t test_vqrshrun_n_s16(int16x8_t a) {
5970   return vqrshrun_n_s16(a, 3);
5971 }
5972 
5973 // CHECK-LABEL: @test_vqrshrun_n_s32(
5974 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5975 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5976 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
5977 // CHECK:   ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)5978 int16x4_t test_vqrshrun_n_s32(int32x4_t a) {
5979   return vqrshrun_n_s32(a, 9);
5980 }
5981 
5982 // CHECK-LABEL: @test_vqrshrun_n_s64(
5983 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5984 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5985 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
5986 // CHECK:   ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)5987 int32x2_t test_vqrshrun_n_s64(int64x2_t a) {
5988   return vqrshrun_n_s64(a, 19);
5989 }
5990 
5991 // CHECK-LABEL: @test_vqrshrun_high_n_s16(
5992 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5993 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5994 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5995 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5996 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrun_high_n_s16(int8x8_t a,int16x8_t b)5997 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5998   return vqrshrun_high_n_s16(a, b, 3);
5999 }
6000 
6001 // CHECK-LABEL: @test_vqrshrun_high_n_s32(
6002 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6003 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6004 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6005 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6006 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrun_high_n_s32(int16x4_t a,int32x4_t b)6007 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6008   return vqrshrun_high_n_s32(a, b, 9);
6009 }
6010 
6011 // CHECK-LABEL: @test_vqrshrun_high_n_s64(
6012 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6013 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6014 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6015 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6016 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrun_high_n_s64(int32x2_t a,int64x2_t b)6017 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6018   return vqrshrun_high_n_s64(a, b, 19);
6019 }
6020 
6021 // CHECK-LABEL: @test_vqshrn_n_s16(
6022 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6023 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6025 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)6026 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
6027   return vqshrn_n_s16(a, 3);
6028 }
6029 
6030 // CHECK-LABEL: @test_vqshrn_n_s32(
6031 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6032 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6033 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6034 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)6035 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
6036   return vqshrn_n_s32(a, 9);
6037 }
6038 
6039 // CHECK-LABEL: @test_vqshrn_n_s64(
6040 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6041 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6042 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6043 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)6044 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
6045   return vqshrn_n_s64(a, 19);
6046 }
6047 
6048 // CHECK-LABEL: @test_vqshrn_n_u16(
6049 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6050 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6051 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6052 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)6053 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
6054   return vqshrn_n_u16(a, 3);
6055 }
6056 
6057 // CHECK-LABEL: @test_vqshrn_n_u32(
6058 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6059 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6060 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6061 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)6062 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
6063   return vqshrn_n_u32(a, 9);
6064 }
6065 
6066 // CHECK-LABEL: @test_vqshrn_n_u64(
6067 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6068 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6069 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6070 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)6071 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
6072   return vqshrn_n_u64(a, 19);
6073 }
6074 
6075 // CHECK-LABEL: @test_vqshrn_high_n_s16(
6076 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6077 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6078 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6079 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6080 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_s16(int8x8_t a,int16x8_t b)6081 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6082   return vqshrn_high_n_s16(a, b, 3);
6083 }
6084 
6085 // CHECK-LABEL: @test_vqshrn_high_n_s32(
6086 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6087 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6088 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6089 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6090 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_s32(int16x4_t a,int32x4_t b)6091 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6092   return vqshrn_high_n_s32(a, b, 9);
6093 }
6094 
6095 // CHECK-LABEL: @test_vqshrn_high_n_s64(
6096 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6097 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6098 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6099 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6100 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_s64(int32x2_t a,int64x2_t b)6101 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6102   return vqshrn_high_n_s64(a, b, 19);
6103 }
6104 
6105 // CHECK-LABEL: @test_vqshrn_high_n_u16(
6106 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6107 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6108 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6109 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6110 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6111 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6112   return vqshrn_high_n_u16(a, b, 3);
6113 }
6114 
6115 // CHECK-LABEL: @test_vqshrn_high_n_u32(
6116 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6117 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6118 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6119 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6120 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6121 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6122   return vqshrn_high_n_u32(a, b, 9);
6123 }
6124 
6125 // CHECK-LABEL: @test_vqshrn_high_n_u64(
6126 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6127 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6128 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6129 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6130 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6131 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6132   return vqshrn_high_n_u64(a, b, 19);
6133 }
6134 
6135 // CHECK-LABEL: @test_vqrshrn_n_s16(
6136 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6137 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6138 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6139 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)6140 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
6141   return vqrshrn_n_s16(a, 3);
6142 }
6143 
6144 // CHECK-LABEL: @test_vqrshrn_n_s32(
6145 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6146 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6147 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6148 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)6149 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
6150   return vqrshrn_n_s32(a, 9);
6151 }
6152 
6153 // CHECK-LABEL: @test_vqrshrn_n_s64(
6154 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6155 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6156 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6157 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)6158 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
6159   return vqrshrn_n_s64(a, 19);
6160 }
6161 
6162 // CHECK-LABEL: @test_vqrshrn_n_u16(
6163 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6164 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6165 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6166 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)6167 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
6168   return vqrshrn_n_u16(a, 3);
6169 }
6170 
6171 // CHECK-LABEL: @test_vqrshrn_n_u32(
6172 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6173 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6174 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6175 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)6176 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
6177   return vqrshrn_n_u32(a, 9);
6178 }
6179 
6180 // CHECK-LABEL: @test_vqrshrn_n_u64(
6181 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6182 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6183 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6184 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)6185 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
6186   return vqrshrn_n_u64(a, 19);
6187 }
6188 
6189 // CHECK-LABEL: @test_vqrshrn_high_n_s16(
6190 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6191 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6192 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6193 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6194 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_s16(int8x8_t a,int16x8_t b)6195 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6196   return vqrshrn_high_n_s16(a, b, 3);
6197 }
6198 
6199 // CHECK-LABEL: @test_vqrshrn_high_n_s32(
6200 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6201 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6202 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6203 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6204 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_s32(int16x4_t a,int32x4_t b)6205 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6206   return vqrshrn_high_n_s32(a, b, 9);
6207 }
6208 
6209 // CHECK-LABEL: @test_vqrshrn_high_n_s64(
6210 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6211 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6212 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6213 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6214 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_s64(int32x2_t a,int64x2_t b)6215 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6216   return vqrshrn_high_n_s64(a, b, 19);
6217 }
6218 
6219 // CHECK-LABEL: @test_vqrshrn_high_n_u16(
6220 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6221 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6222 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6223 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6224 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6225 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6226   return vqrshrn_high_n_u16(a, b, 3);
6227 }
6228 
6229 // CHECK-LABEL: @test_vqrshrn_high_n_u32(
6230 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6231 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6232 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6233 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6234 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6235 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6236   return vqrshrn_high_n_u32(a, b, 9);
6237 }
6238 
6239 // CHECK-LABEL: @test_vqrshrn_high_n_u64(
6240 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6241 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6242 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6243 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6244 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6245 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6246   return vqrshrn_high_n_u64(a, b, 19);
6247 }
6248 
6249 // CHECK-LABEL: @test_vshll_n_s8(
6250 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6251 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6252 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)6253 int16x8_t test_vshll_n_s8(int8x8_t a) {
6254   return vshll_n_s8(a, 3);
6255 }
6256 
6257 // CHECK-LABEL: @test_vshll_n_s16(
6258 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6259 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6260 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6261 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6262 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)6263 int32x4_t test_vshll_n_s16(int16x4_t a) {
6264   return vshll_n_s16(a, 9);
6265 }
6266 
6267 // CHECK-LABEL: @test_vshll_n_s32(
6268 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6269 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6270 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6271 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6272 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)6273 int64x2_t test_vshll_n_s32(int32x2_t a) {
6274   return vshll_n_s32(a, 19);
6275 }
6276 
6277 // CHECK-LABEL: @test_vshll_n_u8(
6278 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6279 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6280 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)6281 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
6282   return vshll_n_u8(a, 3);
6283 }
6284 
6285 // CHECK-LABEL: @test_vshll_n_u16(
6286 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6287 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6288 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6289 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6290 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)6291 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
6292   return vshll_n_u16(a, 9);
6293 }
6294 
6295 // CHECK-LABEL: @test_vshll_n_u32(
6296 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6297 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6298 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6299 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6300 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)6301 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
6302   return vshll_n_u32(a, 19);
6303 }
6304 
6305 // CHECK-LABEL: @test_vshll_high_n_s8(
6306 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6307 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6308 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6309 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_s8(int8x16_t a)6310 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
6311   return vshll_high_n_s8(a, 3);
6312 }
6313 
6314 // CHECK-LABEL: @test_vshll_high_n_s16(
6315 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6316 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6317 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6318 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6319 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6320 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_s16(int16x8_t a)6321 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
6322   return vshll_high_n_s16(a, 9);
6323 }
6324 
6325 // CHECK-LABEL: @test_vshll_high_n_s32(
6326 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6327 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6328 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6329 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6330 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6331 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_s32(int32x4_t a)6332 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
6333   return vshll_high_n_s32(a, 19);
6334 }
6335 
6336 // CHECK-LABEL: @test_vshll_high_n_u8(
6337 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6338 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6339 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6340 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_u8(uint8x16_t a)6341 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
6342   return vshll_high_n_u8(a, 3);
6343 }
6344 
6345 // CHECK-LABEL: @test_vshll_high_n_u16(
6346 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6347 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6348 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6349 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6350 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6351 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_u16(uint16x8_t a)6352 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
6353   return vshll_high_n_u16(a, 9);
6354 }
6355 
6356 // CHECK-LABEL: @test_vshll_high_n_u32(
6357 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6358 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6359 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6360 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6361 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6362 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_u32(uint32x4_t a)6363 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
6364   return vshll_high_n_u32(a, 19);
6365 }
6366 
6367 // CHECK-LABEL: @test_vmovl_s8(
6368 // CHECK:   [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6369 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)6370 int16x8_t test_vmovl_s8(int8x8_t a) {
6371   return vmovl_s8(a);
6372 }
6373 
6374 // CHECK-LABEL: @test_vmovl_s16(
6375 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6376 // CHECK:   [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6377 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)6378 int32x4_t test_vmovl_s16(int16x4_t a) {
6379   return vmovl_s16(a);
6380 }
6381 
6382 // CHECK-LABEL: @test_vmovl_s32(
6383 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6384 // CHECK:   [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6385 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)6386 int64x2_t test_vmovl_s32(int32x2_t a) {
6387   return vmovl_s32(a);
6388 }
6389 
6390 // CHECK-LABEL: @test_vmovl_u8(
6391 // CHECK:   [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6392 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)6393 uint16x8_t test_vmovl_u8(uint8x8_t a) {
6394   return vmovl_u8(a);
6395 }
6396 
6397 // CHECK-LABEL: @test_vmovl_u16(
6398 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6399 // CHECK:   [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6400 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)6401 uint32x4_t test_vmovl_u16(uint16x4_t a) {
6402   return vmovl_u16(a);
6403 }
6404 
6405 // CHECK-LABEL: @test_vmovl_u32(
6406 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6407 // CHECK:   [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6408 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)6409 uint64x2_t test_vmovl_u32(uint32x2_t a) {
6410   return vmovl_u32(a);
6411 }
6412 
6413 // CHECK-LABEL: @test_vmovl_high_s8(
6414 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6415 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6416 // CHECK:   ret <8 x i16> [[TMP0]]
test_vmovl_high_s8(int8x16_t a)6417 int16x8_t test_vmovl_high_s8(int8x16_t a) {
6418   return vmovl_high_s8(a);
6419 }
6420 
6421 // CHECK-LABEL: @test_vmovl_high_s16(
6422 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6423 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6424 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6425 // CHECK:   ret <4 x i32> [[TMP1]]
test_vmovl_high_s16(int16x8_t a)6426 int32x4_t test_vmovl_high_s16(int16x8_t a) {
6427   return vmovl_high_s16(a);
6428 }
6429 
6430 // CHECK-LABEL: @test_vmovl_high_s32(
6431 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6432 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6433 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6434 // CHECK:   ret <2 x i64> [[TMP1]]
test_vmovl_high_s32(int32x4_t a)6435 int64x2_t test_vmovl_high_s32(int32x4_t a) {
6436   return vmovl_high_s32(a);
6437 }
6438 
6439 // CHECK-LABEL: @test_vmovl_high_u8(
6440 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6441 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6442 // CHECK:   ret <8 x i16> [[TMP0]]
test_vmovl_high_u8(uint8x16_t a)6443 uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
6444   return vmovl_high_u8(a);
6445 }
6446 
6447 // CHECK-LABEL: @test_vmovl_high_u16(
6448 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6449 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6450 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6451 // CHECK:   ret <4 x i32> [[TMP1]]
test_vmovl_high_u16(uint16x8_t a)6452 uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
6453   return vmovl_high_u16(a);
6454 }
6455 
6456 // CHECK-LABEL: @test_vmovl_high_u32(
6457 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6458 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6459 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6460 // CHECK:   ret <2 x i64> [[TMP1]]
test_vmovl_high_u32(uint32x4_t a)6461 uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
6462   return vmovl_high_u32(a);
6463 }
6464 
6465 // CHECK-LABEL: @test_vcvt_n_f32_s32(
6466 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6467 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6468 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6469 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)6470 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
6471   return vcvt_n_f32_s32(a, 31);
6472 }
6473 
6474 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
6475 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6476 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6477 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6478 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)6479 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
6480   return vcvtq_n_f32_s32(a, 31);
6481 }
6482 
6483 // CHECK-LABEL: @test_vcvtq_n_f64_s64(
6484 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6485 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6486 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6487 // CHECK:   ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_s64(int64x2_t a)6488 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
6489   return vcvtq_n_f64_s64(a, 50);
6490 }
6491 
6492 // CHECK-LABEL: @test_vcvt_n_f32_u32(
6493 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6494 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6495 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6496 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)6497 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
6498   return vcvt_n_f32_u32(a, 31);
6499 }
6500 
6501 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
6502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6503 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6504 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6505 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)6506 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
6507   return vcvtq_n_f32_u32(a, 31);
6508 }
6509 
6510 // CHECK-LABEL: @test_vcvtq_n_f64_u64(
6511 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6512 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6513 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6514 // CHECK:   ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_u64(uint64x2_t a)6515 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
6516   return vcvtq_n_f64_u64(a, 50);
6517 }
6518 
6519 // CHECK-LABEL: @test_vcvt_n_s32_f32(
6520 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6521 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6522 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6523 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)6524 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
6525   return vcvt_n_s32_f32(a, 31);
6526 }
6527 
6528 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
6529 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6530 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6531 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6532 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)6533 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
6534   return vcvtq_n_s32_f32(a, 31);
6535 }
6536 
6537 // CHECK-LABEL: @test_vcvtq_n_s64_f64(
6538 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6539 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6540 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6541 // CHECK:   ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_s64_f64(float64x2_t a)6542 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
6543   return vcvtq_n_s64_f64(a, 50);
6544 }
6545 
6546 // CHECK-LABEL: @test_vcvt_n_u32_f32(
6547 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6548 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6549 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6550 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)6551 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
6552   return vcvt_n_u32_f32(a, 31);
6553 }
6554 
6555 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
6556 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6557 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6558 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6559 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)6560 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
6561   return vcvtq_n_u32_f32(a, 31);
6562 }
6563 
6564 // CHECK-LABEL: @test_vcvtq_n_u64_f64(
6565 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6566 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6567 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6568 // CHECK:   ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_u64_f64(float64x2_t a)6569 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
6570   return vcvtq_n_u64_f64(a, 50);
6571 }
6572 
6573 // CHECK-LABEL: @test_vaddl_s8(
6574 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6575 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6576 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6577 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)6578 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
6579   return vaddl_s8(a, b);
6580 }
6581 
6582 // CHECK-LABEL: @test_vaddl_s16(
6583 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6584 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6585 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6586 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6587 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6588 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)6589 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
6590   return vaddl_s16(a, b);
6591 }
6592 
6593 // CHECK-LABEL: @test_vaddl_s32(
6594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6595 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6596 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6597 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6598 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6599 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)6600 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
6601   return vaddl_s32(a, b);
6602 }
6603 
6604 // CHECK-LABEL: @test_vaddl_u8(
6605 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6606 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6607 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6608 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)6609 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
6610   return vaddl_u8(a, b);
6611 }
6612 
6613 // CHECK-LABEL: @test_vaddl_u16(
6614 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6615 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6616 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6617 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6618 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6619 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)6620 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
6621   return vaddl_u16(a, b);
6622 }
6623 
6624 // CHECK-LABEL: @test_vaddl_u32(
6625 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6626 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6628 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6629 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6630 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)6631 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
6632   return vaddl_u32(a, b);
6633 }
6634 
6635 // CHECK-LABEL: @test_vaddl_high_s8(
6636 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6637 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6638 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6639 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6640 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6641 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_high_s8(int8x16_t a,int8x16_t b)6642 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
6643   return vaddl_high_s8(a, b);
6644 }
6645 
6646 // CHECK-LABEL: @test_vaddl_high_s16(
6647 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6648 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6649 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6650 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6651 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6652 // CHECK:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6653 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6654 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_high_s16(int16x8_t a,int16x8_t b)6655 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
6656   return vaddl_high_s16(a, b);
6657 }
6658 
6659 // CHECK-LABEL: @test_vaddl_high_s32(
6660 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6661 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6662 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6663 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6664 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6665 // CHECK:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6666 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6667 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_high_s32(int32x4_t a,int32x4_t b)6668 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
6669   return vaddl_high_s32(a, b);
6670 }
6671 
6672 // CHECK-LABEL: @test_vaddl_high_u8(
6673 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6674 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6675 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6676 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6677 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6678 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_high_u8(uint8x16_t a,uint8x16_t b)6679 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
6680   return vaddl_high_u8(a, b);
6681 }
6682 
6683 // CHECK-LABEL: @test_vaddl_high_u16(
6684 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6685 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6686 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6687 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6688 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6689 // CHECK:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6690 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6691 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_high_u16(uint16x8_t a,uint16x8_t b)6692 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
6693   return vaddl_high_u16(a, b);
6694 }
6695 
6696 // CHECK-LABEL: @test_vaddl_high_u32(
6697 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6698 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6699 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6700 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6701 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6702 // CHECK:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6703 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6704 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_high_u32(uint32x4_t a,uint32x4_t b)6705 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
6706   return vaddl_high_u32(a, b);
6707 }
6708 
6709 // CHECK-LABEL: @test_vaddw_s8(
6710 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6711 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6712 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)6713 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
6714   return vaddw_s8(a, b);
6715 }
6716 
6717 // CHECK-LABEL: @test_vaddw_s16(
6718 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6719 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6720 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6721 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)6722 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
6723   return vaddw_s16(a, b);
6724 }
6725 
6726 // CHECK-LABEL: @test_vaddw_s32(
6727 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6728 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6729 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6730 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)6731 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
6732   return vaddw_s32(a, b);
6733 }
6734 
6735 // CHECK-LABEL: @test_vaddw_u8(
6736 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6737 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6738 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)6739 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
6740   return vaddw_u8(a, b);
6741 }
6742 
6743 // CHECK-LABEL: @test_vaddw_u16(
6744 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6745 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6746 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6747 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)6748 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
6749   return vaddw_u16(a, b);
6750 }
6751 
6752 // CHECK-LABEL: @test_vaddw_u32(
6753 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6754 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6755 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6756 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)6757 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
6758   return vaddw_u32(a, b);
6759 }
6760 
6761 // CHECK-LABEL: @test_vaddw_high_s8(
6762 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6763 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6764 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6765 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_high_s8(int16x8_t a,int8x16_t b)6766 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
6767   return vaddw_high_s8(a, b);
6768 }
6769 
6770 // CHECK-LABEL: @test_vaddw_high_s16(
6771 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6773 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6774 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6775 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_high_s16(int32x4_t a,int16x8_t b)6776 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
6777   return vaddw_high_s16(a, b);
6778 }
6779 
6780 // CHECK-LABEL: @test_vaddw_high_s32(
6781 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6782 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6783 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6784 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6785 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_high_s32(int64x2_t a,int32x4_t b)6786 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
6787   return vaddw_high_s32(a, b);
6788 }
6789 
6790 // CHECK-LABEL: @test_vaddw_high_u8(
6791 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6792 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6793 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6794 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_high_u8(uint16x8_t a,uint8x16_t b)6795 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
6796   return vaddw_high_u8(a, b);
6797 }
6798 
6799 // CHECK-LABEL: @test_vaddw_high_u16(
6800 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6801 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6802 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6803 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6804 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_high_u16(uint32x4_t a,uint16x8_t b)6805 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
6806   return vaddw_high_u16(a, b);
6807 }
6808 
6809 // CHECK-LABEL: @test_vaddw_high_u32(
6810 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6811 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6812 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6813 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6814 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_high_u32(uint64x2_t a,uint32x4_t b)6815 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
6816   return vaddw_high_u32(a, b);
6817 }
6818 
6819 // CHECK-LABEL: @test_vsubl_s8(
6820 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6821 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6822 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6823 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)6824 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
6825   return vsubl_s8(a, b);
6826 }
6827 
6828 // CHECK-LABEL: @test_vsubl_s16(
6829 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6830 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6831 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6832 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6833 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6834 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)6835 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
6836   return vsubl_s16(a, b);
6837 }
6838 
6839 // CHECK-LABEL: @test_vsubl_s32(
6840 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6841 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6842 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6843 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6844 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6845 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)6846 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
6847   return vsubl_s32(a, b);
6848 }
6849 
6850 // CHECK-LABEL: @test_vsubl_u8(
6851 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6852 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6853 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6854 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)6855 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
6856   return vsubl_u8(a, b);
6857 }
6858 
6859 // CHECK-LABEL: @test_vsubl_u16(
6860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6861 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6862 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6863 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6864 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6865 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)6866 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
6867   return vsubl_u16(a, b);
6868 }
6869 
6870 // CHECK-LABEL: @test_vsubl_u32(
6871 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6872 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6873 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6874 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6875 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6876 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)6877 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
6878   return vsubl_u32(a, b);
6879 }
6880 
6881 // CHECK-LABEL: @test_vsubl_high_s8(
6882 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6883 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6884 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6885 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6886 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6887 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_high_s8(int8x16_t a,int8x16_t b)6888 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
6889   return vsubl_high_s8(a, b);
6890 }
6891 
6892 // CHECK-LABEL: @test_vsubl_high_s16(
6893 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6895 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6896 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6897 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6898 // CHECK:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6899 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6900 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_high_s16(int16x8_t a,int16x8_t b)6901 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
6902   return vsubl_high_s16(a, b);
6903 }
6904 
6905 // CHECK-LABEL: @test_vsubl_high_s32(
6906 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6907 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6908 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6909 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6910 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6911 // CHECK:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6912 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6913 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_high_s32(int32x4_t a,int32x4_t b)6914 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
6915   return vsubl_high_s32(a, b);
6916 }
6917 
6918 // CHECK-LABEL: @test_vsubl_high_u8(
6919 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6920 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6921 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6922 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6923 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6924 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_high_u8(uint8x16_t a,uint8x16_t b)6925 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
6926   return vsubl_high_u8(a, b);
6927 }
6928 
6929 // CHECK-LABEL: @test_vsubl_high_u16(
6930 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6931 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6932 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6933 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6934 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6935 // CHECK:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6936 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6937 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_high_u16(uint16x8_t a,uint16x8_t b)6938 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
6939   return vsubl_high_u16(a, b);
6940 }
6941 
6942 // CHECK-LABEL: @test_vsubl_high_u32(
6943 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6944 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6945 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6946 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6947 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6948 // CHECK:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6949 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6950 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_high_u32(uint32x4_t a,uint32x4_t b)6951 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
6952   return vsubl_high_u32(a, b);
6953 }
6954 
6955 // CHECK-LABEL: @test_vsubw_s8(
6956 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6957 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6958 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)6959 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
6960   return vsubw_s8(a, b);
6961 }
6962 
6963 // CHECK-LABEL: @test_vsubw_s16(
6964 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6965 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6966 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6967 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)6968 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
6969   return vsubw_s16(a, b);
6970 }
6971 
6972 // CHECK-LABEL: @test_vsubw_s32(
6973 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6974 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6975 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
6976 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)6977 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
6978   return vsubw_s32(a, b);
6979 }
6980 
6981 // CHECK-LABEL: @test_vsubw_u8(
6982 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6983 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6984 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)6985 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
6986   return vsubw_u8(a, b);
6987 }
6988 
6989 // CHECK-LABEL: @test_vsubw_u16(
6990 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6991 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6992 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6993 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)6994 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
6995   return vsubw_u16(a, b);
6996 }
6997 
6998 // CHECK-LABEL: @test_vsubw_u32(
6999 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7000 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
7001 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7002 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)7003 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
7004   return vsubw_u32(a, b);
7005 }
7006 
7007 // CHECK-LABEL: @test_vsubw_high_s8(
7008 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7009 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7010 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7011 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_high_s8(int16x8_t a,int8x16_t b)7012 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
7013   return vsubw_high_s8(a, b);
7014 }
7015 
7016 // CHECK-LABEL: @test_vsubw_high_s16(
7017 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7018 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7019 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7020 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7021 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_high_s16(int32x4_t a,int16x8_t b)7022 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
7023   return vsubw_high_s16(a, b);
7024 }
7025 
7026 // CHECK-LABEL: @test_vsubw_high_s32(
7027 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7028 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7029 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7030 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7031 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_high_s32(int64x2_t a,int32x4_t b)7032 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
7033   return vsubw_high_s32(a, b);
7034 }
7035 
7036 // CHECK-LABEL: @test_vsubw_high_u8(
7037 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7038 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7039 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7040 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_high_u8(uint16x8_t a,uint8x16_t b)7041 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
7042   return vsubw_high_u8(a, b);
7043 }
7044 
7045 // CHECK-LABEL: @test_vsubw_high_u16(
7046 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7047 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7048 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7049 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7050 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_high_u16(uint32x4_t a,uint16x8_t b)7051 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
7052   return vsubw_high_u16(a, b);
7053 }
7054 
7055 // CHECK-LABEL: @test_vsubw_high_u32(
7056 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7057 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7058 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7059 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7060 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_high_u32(uint64x2_t a,uint32x4_t b)7061 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
7062   return vsubw_high_u32(a, b);
7063 }
7064 
7065 // CHECK-LABEL: @test_vaddhn_s16(
7066 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7067 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7068 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7069 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7070 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7071 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)7072 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
7073   return vaddhn_s16(a, b);
7074 }
7075 
7076 // CHECK-LABEL: @test_vaddhn_s32(
7077 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7078 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7079 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7080 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7081 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7082 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)7083 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
7084   return vaddhn_s32(a, b);
7085 }
7086 
7087 // CHECK-LABEL: @test_vaddhn_s64(
7088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7089 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7090 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7091 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7092 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7093 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)7094 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
7095   return vaddhn_s64(a, b);
7096 }
7097 
7098 // CHECK-LABEL: @test_vaddhn_u16(
7099 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7100 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7101 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7102 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7103 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7104 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)7105 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
7106   return vaddhn_u16(a, b);
7107 }
7108 
7109 // CHECK-LABEL: @test_vaddhn_u32(
7110 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7111 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7112 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7113 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7114 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7115 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)7116 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
7117   return vaddhn_u32(a, b);
7118 }
7119 
7120 // CHECK-LABEL: @test_vaddhn_u64(
7121 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7122 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7123 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7124 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7125 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7126 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)7127 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
7128   return vaddhn_u64(a, b);
7129 }
7130 
7131 // CHECK-LABEL: @test_vaddhn_high_s16(
7132 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7133 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7134 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7135 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7136 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7137 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7138 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7139 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7140   return vaddhn_high_s16(r, a, b);
7141 }
7142 
7143 // CHECK-LABEL: @test_vaddhn_high_s32(
7144 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7145 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7146 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7147 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7148 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7149 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7150 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7151 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7152   return vaddhn_high_s32(r, a, b);
7153 }
7154 
7155 // CHECK-LABEL: @test_vaddhn_high_s64(
7156 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7157 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7158 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7159 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7160 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7161 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7162 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7163 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7164   return vaddhn_high_s64(r, a, b);
7165 }
7166 
7167 // CHECK-LABEL: @test_vaddhn_high_u16(
7168 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7169 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7170 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7171 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7172 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7173 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7174 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7175 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7176   return vaddhn_high_u16(r, a, b);
7177 }
7178 
7179 // CHECK-LABEL: @test_vaddhn_high_u32(
7180 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7181 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7182 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7183 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7184 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7185 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7186 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7187 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7188   return vaddhn_high_u32(r, a, b);
7189 }
7190 
7191 // CHECK-LABEL: @test_vaddhn_high_u64(
7192 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7193 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7194 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7195 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7196 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7197 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7198 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7199 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7200   return vaddhn_high_u64(r, a, b);
7201 }
7202 
7203 // CHECK-LABEL: @test_vraddhn_s16(
7204 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7205 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7206 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7207 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)7208 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
7209   return vraddhn_s16(a, b);
7210 }
7211 
7212 // CHECK-LABEL: @test_vraddhn_s32(
7213 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7214 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7215 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7216 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7217 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)7218 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
7219   return vraddhn_s32(a, b);
7220 }
7221 
7222 // CHECK-LABEL: @test_vraddhn_s64(
7223 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7224 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7225 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7226 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7227 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)7228 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
7229   return vraddhn_s64(a, b);
7230 }
7231 
7232 // CHECK-LABEL: @test_vraddhn_u16(
7233 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7234 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7235 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7236 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)7237 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
7238   return vraddhn_u16(a, b);
7239 }
7240 
7241 // CHECK-LABEL: @test_vraddhn_u32(
7242 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7243 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7244 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7245 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7246 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)7247 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
7248   return vraddhn_u32(a, b);
7249 }
7250 
7251 // CHECK-LABEL: @test_vraddhn_u64(
7252 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7253 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7254 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7255 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7256 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)7257 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
7258   return vraddhn_u64(a, b);
7259 }
7260 
7261 // CHECK-LABEL: @test_vraddhn_high_s16(
7262 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7263 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7264 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7265 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7266 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7267 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7268   return vraddhn_high_s16(r, a, b);
7269 }
7270 
7271 // CHECK-LABEL: @test_vraddhn_high_s32(
7272 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7273 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7274 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7275 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7276 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7277 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7278 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7279   return vraddhn_high_s32(r, a, b);
7280 }
7281 
7282 // CHECK-LABEL: @test_vraddhn_high_s64(
7283 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7284 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7285 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7286 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7287 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7288 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7289 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7290   return vraddhn_high_s64(r, a, b);
7291 }
7292 
7293 // CHECK-LABEL: @test_vraddhn_high_u16(
7294 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7295 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7296 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7297 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7298 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7299 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7300   return vraddhn_high_u16(r, a, b);
7301 }
7302 
7303 // CHECK-LABEL: @test_vraddhn_high_u32(
7304 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7305 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7306 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7307 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7308 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7309 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7310 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7311   return vraddhn_high_u32(r, a, b);
7312 }
7313 
7314 // CHECK-LABEL: @test_vraddhn_high_u64(
7315 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7316 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7317 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7318 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7319 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7320 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7321 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7322   return vraddhn_high_u64(r, a, b);
7323 }
7324 
7325 // CHECK-LABEL: @test_vsubhn_s16(
7326 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7327 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7328 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7329 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7330 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7331 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)7332 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
7333   return vsubhn_s16(a, b);
7334 }
7335 
7336 // CHECK-LABEL: @test_vsubhn_s32(
7337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7339 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7340 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7341 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7342 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)7343 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
7344   return vsubhn_s32(a, b);
7345 }
7346 
7347 // CHECK-LABEL: @test_vsubhn_s64(
7348 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7349 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7350 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7351 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7352 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7353 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)7354 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
7355   return vsubhn_s64(a, b);
7356 }
7357 
7358 // CHECK-LABEL: @test_vsubhn_u16(
7359 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7360 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7361 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7362 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7363 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7364 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)7365 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
7366   return vsubhn_u16(a, b);
7367 }
7368 
7369 // CHECK-LABEL: @test_vsubhn_u32(
7370 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7371 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7372 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7373 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7374 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7375 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)7376 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
7377   return vsubhn_u32(a, b);
7378 }
7379 
7380 // CHECK-LABEL: @test_vsubhn_u64(
7381 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7382 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7383 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7384 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7385 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7386 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)7387 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
7388   return vsubhn_u64(a, b);
7389 }
7390 
7391 // CHECK-LABEL: @test_vsubhn_high_s16(
7392 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7393 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7394 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7395 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7396 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7397 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7398 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7399 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7400   return vsubhn_high_s16(r, a, b);
7401 }
7402 
7403 // CHECK-LABEL: @test_vsubhn_high_s32(
7404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7405 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7406 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7407 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7408 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7409 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7410 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7411 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7412   return vsubhn_high_s32(r, a, b);
7413 }
7414 
7415 // CHECK-LABEL: @test_vsubhn_high_s64(
7416 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7417 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7418 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7419 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7420 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7421 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7422 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7423 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7424   return vsubhn_high_s64(r, a, b);
7425 }
7426 
7427 // CHECK-LABEL: @test_vsubhn_high_u16(
7428 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7429 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7430 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7431 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7432 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7433 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7434 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7435 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7436   return vsubhn_high_u16(r, a, b);
7437 }
7438 
7439 // CHECK-LABEL: @test_vsubhn_high_u32(
7440 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7441 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7442 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7443 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7444 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7445 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7446 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7447 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7448   return vsubhn_high_u32(r, a, b);
7449 }
7450 
7451 // CHECK-LABEL: @test_vsubhn_high_u64(
7452 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7453 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7454 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7455 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7456 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7457 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7458 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7459 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7460   return vsubhn_high_u64(r, a, b);
7461 }
7462 
7463 // CHECK-LABEL: @test_vrsubhn_s16(
7464 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7465 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7466 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7467 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)7468 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
7469   return vrsubhn_s16(a, b);
7470 }
7471 
7472 // CHECK-LABEL: @test_vrsubhn_s32(
7473 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7474 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7475 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7476 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7477 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)7478 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
7479   return vrsubhn_s32(a, b);
7480 }
7481 
7482 // CHECK-LABEL: @test_vrsubhn_s64(
7483 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7484 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7485 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7486 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7487 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)7488 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
7489   return vrsubhn_s64(a, b);
7490 }
7491 
7492 // CHECK-LABEL: @test_vrsubhn_u16(
7493 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7494 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7495 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7496 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)7497 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
7498   return vrsubhn_u16(a, b);
7499 }
7500 
7501 // CHECK-LABEL: @test_vrsubhn_u32(
7502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7503 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7504 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7505 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7506 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)7507 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
7508   return vrsubhn_u32(a, b);
7509 }
7510 
7511 // CHECK-LABEL: @test_vrsubhn_u64(
7512 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7513 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7514 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7515 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7516 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)7517 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
7518   return vrsubhn_u64(a, b);
7519 }
7520 
7521 // CHECK-LABEL: @test_vrsubhn_high_s16(
7522 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7523 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7524 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7525 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7526 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7527 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7528   return vrsubhn_high_s16(r, a, b);
7529 }
7530 
7531 // CHECK-LABEL: @test_vrsubhn_high_s32(
7532 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7533 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7534 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7535 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7536 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7537 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7538 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7539   return vrsubhn_high_s32(r, a, b);
7540 }
7541 
7542 // CHECK-LABEL: @test_vrsubhn_high_s64(
7543 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7544 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7545 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7546 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7547 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7548 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7549 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7550   return vrsubhn_high_s64(r, a, b);
7551 }
7552 
7553 // CHECK-LABEL: @test_vrsubhn_high_u16(
7554 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7555 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7556 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7557 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7558 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7559 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7560   return vrsubhn_high_u16(r, a, b);
7561 }
7562 
7563 // CHECK-LABEL: @test_vrsubhn_high_u32(
7564 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7565 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7566 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7567 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7568 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7569 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7570 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7571   return vrsubhn_high_u32(r, a, b);
7572 }
7573 
7574 // CHECK-LABEL: @test_vrsubhn_high_u64(
7575 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7576 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7577 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7578 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7579 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7580 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7581 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7582   return vrsubhn_high_u64(r, a, b);
7583 }
7584 
7585 // CHECK-LABEL: @test_vabdl_s8(
7586 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7587 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7588 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)7589 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
7590   return vabdl_s8(a, b);
7591 }
7592 
7593 // CHECK-LABEL: @test_vabdl_s16(
7594 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7595 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7596 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7597 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7598 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7599 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)7600 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
7601   return vabdl_s16(a, b);
7602 }
7603 
7604 // CHECK-LABEL: @test_vabdl_s32(
7605 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7606 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7607 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7608 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7609 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7610 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)7611 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
7612   return vabdl_s32(a, b);
7613 }
7614 
7615 // CHECK-LABEL: @test_vabdl_u8(
7616 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7617 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7618 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)7619 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
7620   return vabdl_u8(a, b);
7621 }
7622 
7623 // CHECK-LABEL: @test_vabdl_u16(
7624 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7625 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7626 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7627 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7628 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7629 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)7630 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
7631   return vabdl_u16(a, b);
7632 }
7633 
7634 // CHECK-LABEL: @test_vabdl_u32(
7635 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7636 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7637 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7638 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7639 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7640 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)7641 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
7642   return vabdl_u32(a, b);
7643 }
7644 
7645 // CHECK-LABEL: @test_vabal_s8(
7646 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7647 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7648 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7649 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7650 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7651   return vabal_s8(a, b, c);
7652 }
7653 
7654 // CHECK-LABEL: @test_vabal_s16(
7655 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7656 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7657 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7658 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7659 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7660 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7661 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7662 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7663   return vabal_s16(a, b, c);
7664 }
7665 
7666 // CHECK-LABEL: @test_vabal_s32(
7667 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7668 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7669 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7670 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7671 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7672 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7673 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7674 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7675   return vabal_s32(a, b, c);
7676 }
7677 
7678 // CHECK-LABEL: @test_vabal_u8(
7679 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7680 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7681 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7682 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7683 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7684   return vabal_u8(a, b, c);
7685 }
7686 
7687 // CHECK-LABEL: @test_vabal_u16(
7688 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7689 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7690 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7691 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7692 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7693 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7694 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7695 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7696   return vabal_u16(a, b, c);
7697 }
7698 
7699 // CHECK-LABEL: @test_vabal_u32(
7700 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7701 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7702 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7703 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7704 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7705 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7706 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7707 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7708   return vabal_u32(a, b, c);
7709 }
7710 
7711 // CHECK-LABEL: @test_vabdl_high_s8(
7712 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7713 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7714 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7715 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7716 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_s8(int8x16_t a,int8x16_t b)7717 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
7718   return vabdl_high_s8(a, b);
7719 }
7720 
7721 // CHECK-LABEL: @test_vabdl_high_s16(
7722 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7723 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7724 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7725 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7726 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7727 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7728 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7729 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_s16(int16x8_t a,int16x8_t b)7730 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
7731   return vabdl_high_s16(a, b);
7732 }
7733 
7734 // CHECK-LABEL: @test_vabdl_high_s32(
7735 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7736 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7737 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7738 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7739 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7740 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7741 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7742 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_s32(int32x4_t a,int32x4_t b)7743 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
7744   return vabdl_high_s32(a, b);
7745 }
7746 
7747 // CHECK-LABEL: @test_vabdl_high_u8(
7748 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7749 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7750 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7751 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7752 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_u8(uint8x16_t a,uint8x16_t b)7753 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
7754   return vabdl_high_u8(a, b);
7755 }
7756 
7757 // CHECK-LABEL: @test_vabdl_high_u16(
7758 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7759 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7760 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7761 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7762 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7763 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7764 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7765 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_u16(uint16x8_t a,uint16x8_t b)7766 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
7767   return vabdl_high_u16(a, b);
7768 }
7769 
7770 // CHECK-LABEL: @test_vabdl_high_u32(
7771 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7772 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7773 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7774 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7775 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7776 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7777 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7778 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_u32(uint32x4_t a,uint32x4_t b)7779 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
7780   return vabdl_high_u32(a, b);
7781 }
7782 
7783 // CHECK-LABEL: @test_vabal_high_s8(
7784 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7785 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7786 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7787 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7788 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7789 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vabal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)7790 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
7791   return vabal_high_s8(a, b, c);
7792 }
7793 
7794 // CHECK-LABEL: @test_vabal_high_s16(
7795 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7796 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7798 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7799 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7800 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7801 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7802 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7803 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vabal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)7804 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
7805   return vabal_high_s16(a, b, c);
7806 }
7807 
7808 // CHECK-LABEL: @test_vabal_high_s32(
7809 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7810 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7811 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7812 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7813 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7814 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7815 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7816 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7817 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vabal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)7818 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
7819   return vabal_high_s32(a, b, c);
7820 }
7821 
7822 // CHECK-LABEL: @test_vabal_high_u8(
7823 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7824 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7825 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7826 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7827 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7828 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vabal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)7829 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
7830   return vabal_high_u8(a, b, c);
7831 }
7832 
7833 // CHECK-LABEL: @test_vabal_high_u16(
7834 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7835 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7836 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7837 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7838 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7839 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7840 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7841 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7842 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vabal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)7843 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
7844   return vabal_high_u16(a, b, c);
7845 }
7846 
7847 // CHECK-LABEL: @test_vabal_high_u32(
7848 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7849 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7850 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7851 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7852 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7853 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7854 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7855 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7856 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vabal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)7857 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
7858   return vabal_high_u32(a, b, c);
7859 }
7860 
7861 // CHECK-LABEL: @test_vmull_s8(
7862 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
7863 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)7864 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
7865   return vmull_s8(a, b);
7866 }
7867 
7868 // CHECK-LABEL: @test_vmull_s16(
7869 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7870 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7871 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
7872 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)7873 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
7874   return vmull_s16(a, b);
7875 }
7876 
7877 // CHECK-LABEL: @test_vmull_s32(
7878 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7879 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7880 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
7881 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)7882 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
7883   return vmull_s32(a, b);
7884 }
7885 
7886 // CHECK-LABEL: @test_vmull_u8(
7887 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
7888 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)7889 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
7890   return vmull_u8(a, b);
7891 }
7892 
7893 // CHECK-LABEL: @test_vmull_u16(
7894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7895 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7896 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
7897 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)7898 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
7899   return vmull_u16(a, b);
7900 }
7901 
7902 // CHECK-LABEL: @test_vmull_u32(
7903 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7904 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7905 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
7906 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)7907 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
7908   return vmull_u32(a, b);
7909 }
7910 
7911 // CHECK-LABEL: @test_vmull_high_s8(
7912 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7913 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7914 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7915 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_s8(int8x16_t a,int8x16_t b)7916 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
7917   return vmull_high_s8(a, b);
7918 }
7919 
7920 // CHECK-LABEL: @test_vmull_high_s16(
7921 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7922 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7925 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7926 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_s16(int16x8_t a,int16x8_t b)7927 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
7928   return vmull_high_s16(a, b);
7929 }
7930 
7931 // CHECK-LABEL: @test_vmull_high_s32(
7932 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7933 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7934 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7935 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7936 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7937 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_s32(int32x4_t a,int32x4_t b)7938 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
7939   return vmull_high_s32(a, b);
7940 }
7941 
7942 // CHECK-LABEL: @test_vmull_high_u8(
7943 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7944 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7945 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7946 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_u8(uint8x16_t a,uint8x16_t b)7947 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
7948   return vmull_high_u8(a, b);
7949 }
7950 
7951 // CHECK-LABEL: @test_vmull_high_u16(
7952 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7953 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7954 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7955 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7956 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7957 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_u16(uint16x8_t a,uint16x8_t b)7958 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
7959   return vmull_high_u16(a, b);
7960 }
7961 
7962 // CHECK-LABEL: @test_vmull_high_u32(
7963 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7964 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7965 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7966 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7967 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7968 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_u32(uint32x4_t a,uint32x4_t b)7969 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
7970   return vmull_high_u32(a, b);
7971 }
7972 
7973 // CHECK-LABEL: @test_vmlal_s8(
7974 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
7975 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7976 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7977 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7978   return vmlal_s8(a, b, c);
7979 }
7980 
7981 // CHECK-LABEL: @test_vmlal_s16(
7982 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7983 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7984 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
7985 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7986 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7987 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7988   return vmlal_s16(a, b, c);
7989 }
7990 
7991 // CHECK-LABEL: @test_vmlal_s32(
7992 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7993 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7994 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
7995 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7996 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7997 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7998   return vmlal_s32(a, b, c);
7999 }
8000 
8001 // CHECK-LABEL: @test_vmlal_u8(
8002 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8003 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8004 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8005 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8006   return vmlal_u8(a, b, c);
8007 }
8008 
8009 // CHECK-LABEL: @test_vmlal_u16(
8010 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8011 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8012 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8013 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8014 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8015 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8016   return vmlal_u16(a, b, c);
8017 }
8018 
8019 // CHECK-LABEL: @test_vmlal_u32(
8020 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8021 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8022 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8023 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8024 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8025 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8026   return vmlal_u32(a, b, c);
8027 }
8028 
8029 // CHECK-LABEL: @test_vmlal_high_s8(
8030 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8031 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8032 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8033 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8034 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8035 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8036   return vmlal_high_s8(a, b, c);
8037 }
8038 
8039 // CHECK-LABEL: @test_vmlal_high_s16(
8040 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8041 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8042 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8043 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8044 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8045 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8046 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8047 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8048   return vmlal_high_s16(a, b, c);
8049 }
8050 
8051 // CHECK-LABEL: @test_vmlal_high_s32(
8052 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8053 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8054 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8055 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8056 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8057 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8058 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8059 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8060   return vmlal_high_s32(a, b, c);
8061 }
8062 
8063 // CHECK-LABEL: @test_vmlal_high_u8(
8064 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8065 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8066 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8067 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8068 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8069 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8070   return vmlal_high_u8(a, b, c);
8071 }
8072 
8073 // CHECK-LABEL: @test_vmlal_high_u16(
8074 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8075 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8076 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8077 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8078 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8079 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8080 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8081 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8082   return vmlal_high_u16(a, b, c);
8083 }
8084 
8085 // CHECK-LABEL: @test_vmlal_high_u32(
8086 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8087 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8089 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8090 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8091 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8092 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8093 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8094   return vmlal_high_u32(a, b, c);
8095 }
8096 
8097 // CHECK-LABEL: @test_vmlsl_s8(
8098 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
8099 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8100 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)8101 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8102   return vmlsl_s8(a, b, c);
8103 }
8104 
8105 // CHECK-LABEL: @test_vmlsl_s16(
8106 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8107 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8108 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
8109 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8110 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8111 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8112   return vmlsl_s16(a, b, c);
8113 }
8114 
8115 // CHECK-LABEL: @test_vmlsl_s32(
8116 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8117 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8118 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
8119 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8120 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8121 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8122   return vmlsl_s32(a, b, c);
8123 }
8124 
8125 // CHECK-LABEL: @test_vmlsl_u8(
8126 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8127 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8128 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8129 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8130   return vmlsl_u8(a, b, c);
8131 }
8132 
8133 // CHECK-LABEL: @test_vmlsl_u16(
8134 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8135 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8136 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8137 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8138 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8139 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8140   return vmlsl_u16(a, b, c);
8141 }
8142 
8143 // CHECK-LABEL: @test_vmlsl_u32(
8144 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8145 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8146 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8147 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8148 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8149 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8150   return vmlsl_u32(a, b, c);
8151 }
8152 
8153 // CHECK-LABEL: @test_vmlsl_high_s8(
8154 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8155 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8156 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8157 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8158 // CHECK:   ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8159 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8160   return vmlsl_high_s8(a, b, c);
8161 }
8162 
8163 // CHECK-LABEL: @test_vmlsl_high_s16(
8164 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8165 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8166 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8167 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8168 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8169 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8170 // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8171 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8172   return vmlsl_high_s16(a, b, c);
8173 }
8174 
8175 // CHECK-LABEL: @test_vmlsl_high_s32(
8176 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8177 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8178 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8179 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8180 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8181 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8182 // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8183 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8184   return vmlsl_high_s32(a, b, c);
8185 }
8186 
8187 // CHECK-LABEL: @test_vmlsl_high_u8(
8188 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8189 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8190 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8191 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8192 // CHECK:   ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8193 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8194   return vmlsl_high_u8(a, b, c);
8195 }
8196 
8197 // CHECK-LABEL: @test_vmlsl_high_u16(
8198 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8199 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8200 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8201 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8202 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8203 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8204 // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8205 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8206   return vmlsl_high_u16(a, b, c);
8207 }
8208 
8209 // CHECK-LABEL: @test_vmlsl_high_u32(
8210 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8211 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8212 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8213 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8214 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8215 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8216 // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8217 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8218   return vmlsl_high_u32(a, b, c);
8219 }
8220 
8221 // CHECK-LABEL: @test_vqdmull_s16(
8222 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8223 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8224 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
8225 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
8226 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)8227 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
8228   return vqdmull_s16(a, b);
8229 }
8230 
8231 // CHECK-LABEL: @test_vqdmull_s32(
8232 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8233 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8234 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
8235 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
8236 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)8237 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
8238   return vqdmull_s32(a, b);
8239 }
8240 
8241 // CHECK-LABEL: @test_vqdmlal_s16(
8242 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8243 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8244 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8245 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8246 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8247 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8248 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8249   return vqdmlal_s16(a, b, c);
8250 }
8251 
8252 // CHECK-LABEL: @test_vqdmlal_s32(
8253 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8254 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8255 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8256 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8257 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8258 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8259 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8260   return vqdmlal_s32(a, b, c);
8261 }
8262 
8263 // CHECK-LABEL: @test_vqdmlsl_s16(
8264 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8265 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8266 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8267 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8268 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8269 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8270 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8271   return vqdmlsl_s16(a, b, c);
8272 }
8273 
8274 // CHECK-LABEL: @test_vqdmlsl_s32(
8275 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8276 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8277 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8278 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8279 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8280 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8281 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8282   return vqdmlsl_s32(a, b, c);
8283 }
8284 
8285 // CHECK-LABEL: @test_vqdmull_high_s16(
8286 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8287 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8288 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8289 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8290 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8291 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
8292 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s16(int16x8_t a,int16x8_t b)8293 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
8294   return vqdmull_high_s16(a, b);
8295 }
8296 
8297 // CHECK-LABEL: @test_vqdmull_high_s32(
8298 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8299 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8300 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8301 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8302 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8303 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
8304 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s32(int32x4_t a,int32x4_t b)8305 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
8306   return vqdmull_high_s32(a, b);
8307 }
8308 
8309 // CHECK-LABEL: @test_vqdmlal_high_s16(
8310 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8311 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8312 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8313 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8314 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8315 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8316 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8317 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8318 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8319   return vqdmlal_high_s16(a, b, c);
8320 }
8321 
8322 // CHECK-LABEL: @test_vqdmlal_high_s32(
8323 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8324 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8325 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8326 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8327 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8328 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8329 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8330 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8331 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8332   return vqdmlal_high_s32(a, b, c);
8333 }
8334 
8335 // CHECK-LABEL: @test_vqdmlsl_high_s16(
8336 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8337 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8338 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8339 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8340 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8341 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8342 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8343 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8344 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8345   return vqdmlsl_high_s16(a, b, c);
8346 }
8347 
8348 // CHECK-LABEL: @test_vqdmlsl_high_s32(
8349 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8350 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8351 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8352 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8353 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8354 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8355 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8356 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8357 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8358   return vqdmlsl_high_s32(a, b, c);
8359 }
8360 
8361 // CHECK-LABEL: @test_vmull_p8(
8362 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
8363 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)8364 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8365   return vmull_p8(a, b);
8366 }
8367 
8368 // CHECK-LABEL: @test_vmull_high_p8(
8369 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8370 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8371 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8372 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_p8(poly8x16_t a,poly8x16_t b)8373 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
8374   return vmull_high_p8(a, b);
8375 }
8376 
8377 // CHECK-LABEL: @test_vaddd_s64(
8378 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
8379 // CHECK:   ret i64 [[VADDD_I]]
test_vaddd_s64(int64_t a,int64_t b)8380 int64_t test_vaddd_s64(int64_t a, int64_t b) {
8381   return vaddd_s64(a, b);
8382 }
8383 
8384 // CHECK-LABEL: @test_vaddd_u64(
8385 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
8386 // CHECK:   ret i64 [[VADDD_I]]
test_vaddd_u64(uint64_t a,uint64_t b)8387 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
8388   return vaddd_u64(a, b);
8389 }
8390 
8391 // CHECK-LABEL: @test_vsubd_s64(
8392 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
8393 // CHECK:   ret i64 [[VSUBD_I]]
test_vsubd_s64(int64_t a,int64_t b)8394 int64_t test_vsubd_s64(int64_t a, int64_t b) {
8395   return vsubd_s64(a, b);
8396 }
8397 
8398 // CHECK-LABEL: @test_vsubd_u64(
8399 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
8400 // CHECK:   ret i64 [[VSUBD_I]]
test_vsubd_u64(uint64_t a,uint64_t b)8401 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
8402   return vsubd_u64(a, b);
8403 }
8404 
8405 // CHECK-LABEL: @test_vqaddb_s8(
8406 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8407 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8408 // CHECK:   [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8409 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
8410 // CHECK:   ret i8 [[TMP2]]
test_vqaddb_s8(int8_t a,int8_t b)8411 int8_t test_vqaddb_s8(int8_t a, int8_t b) {
8412   return vqaddb_s8(a, b);
8413 }
8414 
8415 // CHECK-LABEL: @test_vqaddh_s16(
8416 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8417 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8418 // CHECK:   [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8419 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
8420 // CHECK:   ret i16 [[TMP2]]
test_vqaddh_s16(int16_t a,int16_t b)8421 int16_t test_vqaddh_s16(int16_t a, int16_t b) {
8422   return vqaddh_s16(a, b);
8423 }
8424 
8425 // CHECK-LABEL: @test_vqadds_s32(
8426 // CHECK:   [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b)
8427 // CHECK:   ret i32 [[VQADDS_S32_I]]
test_vqadds_s32(int32_t a,int32_t b)8428 int32_t test_vqadds_s32(int32_t a, int32_t b) {
8429   return vqadds_s32(a, b);
8430 }
8431 
8432 // CHECK-LABEL: @test_vqaddd_s64(
8433 // CHECK:   [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b)
8434 // CHECK:   ret i64 [[VQADDD_S64_I]]
test_vqaddd_s64(int64_t a,int64_t b)8435 int64_t test_vqaddd_s64(int64_t a, int64_t b) {
8436   return vqaddd_s64(a, b);
8437 }
8438 
8439 // CHECK-LABEL: @test_vqaddb_u8(
8440 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8441 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8442 // CHECK:   [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8443 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
8444 // CHECK:   ret i8 [[TMP2]]
test_vqaddb_u8(uint8_t a,uint8_t b)8445 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
8446   return vqaddb_u8(a, b);
8447 }
8448 
8449 // CHECK-LABEL: @test_vqaddh_u16(
8450 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8451 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8452 // CHECK:   [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8453 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
8454 // CHECK:   ret i16 [[TMP2]]
test_vqaddh_u16(uint16_t a,uint16_t b)8455 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
8456   return vqaddh_u16(a, b);
8457 }
8458 
8459 // CHECK-LABEL: @test_vqadds_u32(
8460 // CHECK:   [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b)
8461 // CHECK:   ret i32 [[VQADDS_U32_I]]
test_vqadds_u32(uint32_t a,uint32_t b)8462 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
8463   return vqadds_u32(a, b);
8464 }
8465 
8466 // CHECK-LABEL: @test_vqaddd_u64(
8467 // CHECK:   [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b)
8468 // CHECK:   ret i64 [[VQADDD_U64_I]]
test_vqaddd_u64(uint64_t a,uint64_t b)8469 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
8470   return vqaddd_u64(a, b);
8471 }
8472 
8473 // CHECK-LABEL: @test_vqsubb_s8(
8474 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8475 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8476 // CHECK:   [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8477 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
8478 // CHECK:   ret i8 [[TMP2]]
test_vqsubb_s8(int8_t a,int8_t b)8479 int8_t test_vqsubb_s8(int8_t a, int8_t b) {
8480   return vqsubb_s8(a, b);
8481 }
8482 
8483 // CHECK-LABEL: @test_vqsubh_s16(
8484 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8485 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8486 // CHECK:   [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8487 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
8488 // CHECK:   ret i16 [[TMP2]]
test_vqsubh_s16(int16_t a,int16_t b)8489 int16_t test_vqsubh_s16(int16_t a, int16_t b) {
8490   return vqsubh_s16(a, b);
8491 }
8492 
8493 // CHECK-LABEL: @test_vqsubs_s32(
8494 // CHECK:   [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
8495 // CHECK:   ret i32 [[VQSUBS_S32_I]]
test_vqsubs_s32(int32_t a,int32_t b)8496 int32_t test_vqsubs_s32(int32_t a, int32_t b) {
8497   return vqsubs_s32(a, b);
8498 }
8499 
8500 // CHECK-LABEL: @test_vqsubd_s64(
8501 // CHECK:   [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b)
8502 // CHECK:   ret i64 [[VQSUBD_S64_I]]
test_vqsubd_s64(int64_t a,int64_t b)8503 int64_t test_vqsubd_s64(int64_t a, int64_t b) {
8504   return vqsubd_s64(a, b);
8505 }
8506 
8507 // CHECK-LABEL: @test_vqsubb_u8(
8508 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8509 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8510 // CHECK:   [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8511 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
8512 // CHECK:   ret i8 [[TMP2]]
test_vqsubb_u8(uint8_t a,uint8_t b)8513 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
8514   return vqsubb_u8(a, b);
8515 }
8516 
8517 // CHECK-LABEL: @test_vqsubh_u16(
8518 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8519 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8520 // CHECK:   [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8521 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
8522 // CHECK:   ret i16 [[TMP2]]
test_vqsubh_u16(uint16_t a,uint16_t b)8523 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
8524   return vqsubh_u16(a, b);
8525 }
8526 
8527 // CHECK-LABEL: @test_vqsubs_u32(
8528 // CHECK:   [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
8529 // CHECK:   ret i32 [[VQSUBS_U32_I]]
test_vqsubs_u32(uint32_t a,uint32_t b)8530 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
8531   return vqsubs_u32(a, b);
8532 }
8533 
8534 // CHECK-LABEL: @test_vqsubd_u64(
8535 // CHECK:   [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
8536 // CHECK:   ret i64 [[VQSUBD_U64_I]]
test_vqsubd_u64(uint64_t a,uint64_t b)8537 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
8538   return vqsubd_u64(a, b);
8539 }
8540 
8541 // CHECK-LABEL: @test_vshld_s64(
8542 // CHECK:   [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
8543 // CHECK:   ret i64 [[VSHLD_S64_I]]
test_vshld_s64(int64_t a,int64_t b)8544 int64_t test_vshld_s64(int64_t a, int64_t b) {
8545   return vshld_s64(a, b);
8546 }
8547 
8548 // CHECK-LABEL: @test_vshld_u64(
8549 // CHECK:   [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
8550 // CHECK:   ret i64 [[VSHLD_U64_I]]
test_vshld_u64(uint64_t a,uint64_t b)8551 uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
8552   return vshld_u64(a, b);
8553 }
8554 
8555 // CHECK-LABEL: @test_vqshlb_s8(
8556 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8557 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8558 // CHECK:   [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8559 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
8560 // CHECK:   ret i8 [[TMP2]]
test_vqshlb_s8(int8_t a,int8_t b)8561 int8_t test_vqshlb_s8(int8_t a, int8_t b) {
8562   return vqshlb_s8(a, b);
8563 }
8564 
8565 // CHECK-LABEL: @test_vqshlh_s16(
8566 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8567 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8568 // CHECK:   [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8569 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
8570 // CHECK:   ret i16 [[TMP2]]
test_vqshlh_s16(int16_t a,int16_t b)8571 int16_t test_vqshlh_s16(int16_t a, int16_t b) {
8572   return vqshlh_s16(a, b);
8573 }
8574 
8575 // CHECK-LABEL: @test_vqshls_s32(
8576 // CHECK:   [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
8577 // CHECK:   ret i32 [[VQSHLS_S32_I]]
test_vqshls_s32(int32_t a,int32_t b)8578 int32_t test_vqshls_s32(int32_t a, int32_t b) {
8579   return vqshls_s32(a, b);
8580 }
8581 
8582 // CHECK-LABEL: @test_vqshld_s64(
8583 // CHECK:   [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
8584 // CHECK:   ret i64 [[VQSHLD_S64_I]]
test_vqshld_s64(int64_t a,int64_t b)8585 int64_t test_vqshld_s64(int64_t a, int64_t b) {
8586   return vqshld_s64(a, b);
8587 }
8588 
8589 // CHECK-LABEL: @test_vqshlb_u8(
8590 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8591 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8592 // CHECK:   [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8593 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
8594 // CHECK:   ret i8 [[TMP2]]
test_vqshlb_u8(uint8_t a,uint8_t b)8595 uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
8596   return vqshlb_u8(a, b);
8597 }
8598 
8599 // CHECK-LABEL: @test_vqshlh_u16(
8600 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8601 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8602 // CHECK:   [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8603 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
8604 // CHECK:   ret i16 [[TMP2]]
test_vqshlh_u16(uint16_t a,uint16_t b)8605 uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
8606   return vqshlh_u16(a, b);
8607 }
8608 
8609 // CHECK-LABEL: @test_vqshls_u32(
8610 // CHECK:   [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
8611 // CHECK:   ret i32 [[VQSHLS_U32_I]]
test_vqshls_u32(uint32_t a,uint32_t b)8612 uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
8613   return vqshls_u32(a, b);
8614 }
8615 
8616 // CHECK-LABEL: @test_vqshld_u64(
8617 // CHECK:   [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
8618 // CHECK:   ret i64 [[VQSHLD_U64_I]]
test_vqshld_u64(uint64_t a,uint64_t b)8619 uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
8620   return vqshld_u64(a, b);
8621 }
8622 
8623 // CHECK-LABEL: @test_vrshld_s64(
8624 // CHECK:   [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
8625 // CHECK:   ret i64 [[VRSHLD_S64_I]]
test_vrshld_s64(int64_t a,int64_t b)8626 int64_t test_vrshld_s64(int64_t a, int64_t b) {
8627   return vrshld_s64(a, b);
8628 }
8629 
8630 // CHECK-LABEL: @test_vrshld_u64(
8631 // CHECK:   [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
8632 // CHECK:   ret i64 [[VRSHLD_U64_I]]
test_vrshld_u64(uint64_t a,uint64_t b)8633 uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
8634   return vrshld_u64(a, b);
8635 }
8636 
8637 // CHECK-LABEL: @test_vqrshlb_s8(
8638 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8639 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8640 // CHECK:   [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8641 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
8642 // CHECK:   ret i8 [[TMP2]]
test_vqrshlb_s8(int8_t a,int8_t b)8643 int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
8644   return vqrshlb_s8(a, b);
8645 }
8646 
8647 // CHECK-LABEL: @test_vqrshlh_s16(
8648 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8649 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8650 // CHECK:   [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8651 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
8652 // CHECK:   ret i16 [[TMP2]]
test_vqrshlh_s16(int16_t a,int16_t b)8653 int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
8654   return vqrshlh_s16(a, b);
8655 }
8656 
8657 // CHECK-LABEL: @test_vqrshls_s32(
8658 // CHECK:   [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
8659 // CHECK:   ret i32 [[VQRSHLS_S32_I]]
test_vqrshls_s32(int32_t a,int32_t b)8660 int32_t test_vqrshls_s32(int32_t a, int32_t b) {
8661   return vqrshls_s32(a, b);
8662 }
8663 
8664 // CHECK-LABEL: @test_vqrshld_s64(
8665 // CHECK:   [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
8666 // CHECK:   ret i64 [[VQRSHLD_S64_I]]
test_vqrshld_s64(int64_t a,int64_t b)8667 int64_t test_vqrshld_s64(int64_t a, int64_t b) {
8668   return vqrshld_s64(a, b);
8669 }
8670 
8671 // CHECK-LABEL: @test_vqrshlb_u8(
8672 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8673 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8674 // CHECK:   [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8675 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
8676 // CHECK:   ret i8 [[TMP2]]
test_vqrshlb_u8(uint8_t a,uint8_t b)8677 uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
8678   return vqrshlb_u8(a, b);
8679 }
8680 
8681 // CHECK-LABEL: @test_vqrshlh_u16(
8682 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8683 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8684 // CHECK:   [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8685 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
8686 // CHECK:   ret i16 [[TMP2]]
test_vqrshlh_u16(uint16_t a,uint16_t b)8687 uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
8688   return vqrshlh_u16(a, b);
8689 }
8690 
8691 // CHECK-LABEL: @test_vqrshls_u32(
8692 // CHECK:   [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
8693 // CHECK:   ret i32 [[VQRSHLS_U32_I]]
test_vqrshls_u32(uint32_t a,uint32_t b)8694 uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
8695   return vqrshls_u32(a, b);
8696 }
8697 
8698 // CHECK-LABEL: @test_vqrshld_u64(
8699 // CHECK:   [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
8700 // CHECK:   ret i64 [[VQRSHLD_U64_I]]
test_vqrshld_u64(uint64_t a,uint64_t b)8701 uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
8702   return vqrshld_u64(a, b);
8703 }
8704 
8705 // CHECK-LABEL: @test_vpaddd_s64(
8706 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8707 // CHECK:   [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
8708 // CHECK:   ret i64 [[VPADDD_S64_I]]
test_vpaddd_s64(int64x2_t a)8709 int64_t test_vpaddd_s64(int64x2_t a) {
8710   return vpaddd_s64(a);
8711 }
8712 
8713 // CHECK-LABEL: @test_vpadds_f32(
8714 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8715 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
8716 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
8717 // CHECK:   [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
8718 // CHECK:   ret float [[VPADDD_I]]
test_vpadds_f32(float32x2_t a)8719 float32_t test_vpadds_f32(float32x2_t a) {
8720   return vpadds_f32(a);
8721 }
8722 
8723 // CHECK-LABEL: @test_vpaddd_f64(
8724 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8725 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
8726 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
8727 // CHECK:   [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
8728 // CHECK:   ret double [[VPADDD_I]]
test_vpaddd_f64(float64x2_t a)8729 float64_t test_vpaddd_f64(float64x2_t a) {
8730   return vpaddd_f64(a);
8731 }
8732 
8733 // CHECK-LABEL: @test_vpmaxnms_f32(
8734 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8735 // CHECK:   [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
8736 // CHECK:   ret float [[VPMAXNMS_F32_I]]
test_vpmaxnms_f32(float32x2_t a)8737 float32_t test_vpmaxnms_f32(float32x2_t a) {
8738   return vpmaxnms_f32(a);
8739 }
8740 
8741 // CHECK-LABEL: @test_vpmaxnmqd_f64(
8742 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8743 // CHECK:   [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
8744 // CHECK:   ret double [[VPMAXNMQD_F64_I]]
test_vpmaxnmqd_f64(float64x2_t a)8745 float64_t test_vpmaxnmqd_f64(float64x2_t a) {
8746   return vpmaxnmqd_f64(a);
8747 }
8748 
8749 // CHECK-LABEL: @test_vpmaxs_f32(
8750 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8751 // CHECK:   [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
8752 // CHECK:   ret float [[VPMAXS_F32_I]]
test_vpmaxs_f32(float32x2_t a)8753 float32_t test_vpmaxs_f32(float32x2_t a) {
8754   return vpmaxs_f32(a);
8755 }
8756 
8757 // CHECK-LABEL: @test_vpmaxqd_f64(
8758 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8759 // CHECK:   [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
8760 // CHECK:   ret double [[VPMAXQD_F64_I]]
test_vpmaxqd_f64(float64x2_t a)8761 float64_t test_vpmaxqd_f64(float64x2_t a) {
8762   return vpmaxqd_f64(a);
8763 }
8764 
8765 // CHECK-LABEL: @test_vpminnms_f32(
8766 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8767 // CHECK:   [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
8768 // CHECK:   ret float [[VPMINNMS_F32_I]]
test_vpminnms_f32(float32x2_t a)8769 float32_t test_vpminnms_f32(float32x2_t a) {
8770   return vpminnms_f32(a);
8771 }
8772 
8773 // CHECK-LABEL: @test_vpminnmqd_f64(
8774 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8775 // CHECK:   [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
8776 // CHECK:   ret double [[VPMINNMQD_F64_I]]
test_vpminnmqd_f64(float64x2_t a)8777 float64_t test_vpminnmqd_f64(float64x2_t a) {
8778   return vpminnmqd_f64(a);
8779 }
8780 
8781 // CHECK-LABEL: @test_vpmins_f32(
8782 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8783 // CHECK:   [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
8784 // CHECK:   ret float [[VPMINS_F32_I]]
test_vpmins_f32(float32x2_t a)8785 float32_t test_vpmins_f32(float32x2_t a) {
8786   return vpmins_f32(a);
8787 }
8788 
8789 // CHECK-LABEL: @test_vpminqd_f64(
8790 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
8791 // CHECK:   [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
8792 // CHECK:   ret double [[VPMINQD_F64_I]]
test_vpminqd_f64(float64x2_t a)8793 float64_t test_vpminqd_f64(float64x2_t a) {
8794   return vpminqd_f64(a);
8795 }
8796 
8797 // CHECK-LABEL: @test_vqdmulhh_s16(
8798 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8799 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8800 // CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8801 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
8802 // CHECK:   ret i16 [[TMP2]]
test_vqdmulhh_s16(int16_t a,int16_t b)8803 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
8804   return vqdmulhh_s16(a, b);
8805 }
8806 
8807 // CHECK-LABEL: @test_vqdmulhs_s32(
8808 // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
8809 // CHECK:   ret i32 [[VQDMULHS_S32_I]]
test_vqdmulhs_s32(int32_t a,int32_t b)8810 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
8811   return vqdmulhs_s32(a, b);
8812 }
8813 
8814 // CHECK-LABEL: @test_vqrdmulhh_s16(
8815 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8816 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8817 // CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8818 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
8819 // CHECK:   ret i16 [[TMP2]]
test_vqrdmulhh_s16(int16_t a,int16_t b)8820 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
8821   return vqrdmulhh_s16(a, b);
8822 }
8823 
8824 // CHECK-LABEL: @test_vqrdmulhs_s32(
8825 // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b)
8826 // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
test_vqrdmulhs_s32(int32_t a,int32_t b)8827 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
8828   return vqrdmulhs_s32(a, b);
8829 }
8830 
8831 // CHECK-LABEL: @test_vmulxs_f32(
8832 // CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
8833 // CHECK:   ret float [[VMULXS_F32_I]]
test_vmulxs_f32(float32_t a,float32_t b)8834 float32_t test_vmulxs_f32(float32_t a, float32_t b) {
8835   return vmulxs_f32(a, b);
8836 }
8837 
8838 // CHECK-LABEL: @test_vmulxd_f64(
8839 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
8840 // CHECK:   ret double [[VMULXD_F64_I]]
test_vmulxd_f64(float64_t a,float64_t b)8841 float64_t test_vmulxd_f64(float64_t a, float64_t b) {
8842   return vmulxd_f64(a, b);
8843 }
8844 
8845 // CHECK-LABEL: @test_vmulx_f64(
8846 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
8847 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
8848 // CHECK:   [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
8849 // CHECK:   ret <1 x double> [[VMULX2_I]]
test_vmulx_f64(float64x1_t a,float64x1_t b)8850 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
8851   return vmulx_f64(a, b);
8852 }
8853 
8854 // CHECK-LABEL: @test_vrecpss_f32(
8855 // CHECK:   [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
8856 // CHECK:   ret float [[VRECPS_I]]
test_vrecpss_f32(float32_t a,float32_t b)8857 float32_t test_vrecpss_f32(float32_t a, float32_t b) {
8858   return vrecpss_f32(a, b);
8859 }
8860 
8861 // CHECK-LABEL: @test_vrecpsd_f64(
8862 // CHECK:   [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
8863 // CHECK:   ret double [[VRECPS_I]]
test_vrecpsd_f64(float64_t a,float64_t b)8864 float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
8865   return vrecpsd_f64(a, b);
8866 }
8867 
8868 // CHECK-LABEL: @test_vrsqrtss_f32(
8869 // CHECK:   [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
8870 // CHECK:   ret float [[VRSQRTSS_F32_I]]
test_vrsqrtss_f32(float32_t a,float32_t b)8871 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
8872   return vrsqrtss_f32(a, b);
8873 }
8874 
8875 // CHECK-LABEL: @test_vrsqrtsd_f64(
8876 // CHECK:   [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
8877 // CHECK:   ret double [[VRSQRTSD_F64_I]]
test_vrsqrtsd_f64(float64_t a,float64_t b)8878 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
8879   return vrsqrtsd_f64(a, b);
8880 }
8881 
8882 // CHECK-LABEL: @test_vcvts_f32_s32(
8883 // CHECK:   [[TMP0:%.*]] = sitofp i32 %a to float
8884 // CHECK:   ret float [[TMP0]]
test_vcvts_f32_s32(int32_t a)8885 float32_t test_vcvts_f32_s32(int32_t a) {
8886   return vcvts_f32_s32(a);
8887 }
8888 
8889 // CHECK-LABEL: @test_vcvtd_f64_s64(
8890 // CHECK:   [[TMP0:%.*]] = sitofp i64 %a to double
8891 // CHECK:   ret double [[TMP0]]
test_vcvtd_f64_s64(int64_t a)8892 float64_t test_vcvtd_f64_s64(int64_t a) {
8893   return vcvtd_f64_s64(a);
8894 }
8895 
8896 // CHECK-LABEL: @test_vcvts_f32_u32(
8897 // CHECK:   [[TMP0:%.*]] = uitofp i32 %a to float
8898 // CHECK:   ret float [[TMP0]]
test_vcvts_f32_u32(uint32_t a)8899 float32_t test_vcvts_f32_u32(uint32_t a) {
8900   return vcvts_f32_u32(a);
8901 }
8902 
8903 // CHECK-LABEL: @test_vcvtd_f64_u64(
8904 // CHECK:   [[TMP0:%.*]] = uitofp i64 %a to double
8905 // CHECK:   ret double [[TMP0]]
test_vcvtd_f64_u64(uint64_t a)8906 float64_t test_vcvtd_f64_u64(uint64_t a) {
8907   return vcvtd_f64_u64(a);
8908 }
8909 
8910 // CHECK-LABEL: @test_vrecpes_f32(
8911 // CHECK:   [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
8912 // CHECK:   ret float [[VRECPES_F32_I]]
test_vrecpes_f32(float32_t a)8913 float32_t test_vrecpes_f32(float32_t a) {
8914   return vrecpes_f32(a);
8915 }
8916 
8917 // CHECK-LABEL: @test_vrecped_f64(
8918 // CHECK:   [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
8919 // CHECK:   ret double [[VRECPED_F64_I]]
test_vrecped_f64(float64_t a)8920 float64_t test_vrecped_f64(float64_t a) {
8921   return vrecped_f64(a);
8922 }
8923 
8924 // CHECK-LABEL: @test_vrecpxs_f32(
8925 // CHECK:   [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
8926 // CHECK:   ret float [[VRECPXS_F32_I]]
test_vrecpxs_f32(float32_t a)8927 float32_t test_vrecpxs_f32(float32_t a) {
8928   return vrecpxs_f32(a);
8929 }
8930 
8931 // CHECK-LABEL: @test_vrecpxd_f64(
8932 // CHECK:   [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
8933 // CHECK:   ret double [[VRECPXD_F64_I]]
test_vrecpxd_f64(float64_t a)8934 float64_t test_vrecpxd_f64(float64_t a) {
8935   return vrecpxd_f64(a);
8936 }
8937 
8938 // CHECK-LABEL: @test_vrsqrte_u32(
8939 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8940 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
8941 // CHECK:   ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)8942 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
8943   return vrsqrte_u32(a);
8944 }
8945 
8946 // CHECK-LABEL: @test_vrsqrteq_u32(
8947 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8948 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
8949 // CHECK:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)8950 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
8951   return vrsqrteq_u32(a);
8952 }
8953 
8954 // CHECK-LABEL: @test_vrsqrtes_f32(
8955 // CHECK:   [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
8956 // CHECK:   ret float [[VRSQRTES_F32_I]]
test_vrsqrtes_f32(float32_t a)8957 float32_t test_vrsqrtes_f32(float32_t a) {
8958   return vrsqrtes_f32(a);
8959 }
8960 
8961 // CHECK-LABEL: @test_vrsqrted_f64(
8962 // CHECK:   [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
8963 // CHECK:   ret double [[VRSQRTED_F64_I]]
test_vrsqrted_f64(float64_t a)8964 float64_t test_vrsqrted_f64(float64_t a) {
8965   return vrsqrted_f64(a);
8966 }
8967 
8968 // CHECK-LABEL: @test_vld1q_u8(
8969 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
8970 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
8971 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_u8(uint8_t const * a)8972 uint8x16_t test_vld1q_u8(uint8_t const *a) {
8973   return vld1q_u8(a);
8974 }
8975 
8976 // CHECK-LABEL: @test_vld1q_u16(
8977 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
8978 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
8979 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
8980 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_u16(uint16_t const * a)8981 uint16x8_t test_vld1q_u16(uint16_t const *a) {
8982   return vld1q_u16(a);
8983 }
8984 
8985 // CHECK-LABEL: @test_vld1q_u32(
8986 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
8987 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
8988 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
8989 // CHECK:   ret <4 x i32> [[TMP2]]
test_vld1q_u32(uint32_t const * a)8990 uint32x4_t test_vld1q_u32(uint32_t const *a) {
8991   return vld1q_u32(a);
8992 }
8993 
8994 // CHECK-LABEL: @test_vld1q_u64(
8995 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
8996 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
8997 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
8998 // CHECK:   ret <2 x i64> [[TMP2]]
test_vld1q_u64(uint64_t const * a)8999 uint64x2_t test_vld1q_u64(uint64_t const *a) {
9000   return vld1q_u64(a);
9001 }
9002 
9003 // CHECK-LABEL: @test_vld1q_s8(
9004 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9005 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9006 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_s8(int8_t const * a)9007 int8x16_t test_vld1q_s8(int8_t const *a) {
9008   return vld1q_s8(a);
9009 }
9010 
9011 // CHECK-LABEL: @test_vld1q_s16(
9012 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9013 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9014 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9015 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_s16(int16_t const * a)9016 int16x8_t test_vld1q_s16(int16_t const *a) {
9017   return vld1q_s16(a);
9018 }
9019 
9020 // CHECK-LABEL: @test_vld1q_s32(
9021 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9022 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9023 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9024 // CHECK:   ret <4 x i32> [[TMP2]]
test_vld1q_s32(int32_t const * a)9025 int32x4_t test_vld1q_s32(int32_t const *a) {
9026   return vld1q_s32(a);
9027 }
9028 
9029 // CHECK-LABEL: @test_vld1q_s64(
9030 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9031 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9032 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9033 // CHECK:   ret <2 x i64> [[TMP2]]
test_vld1q_s64(int64_t const * a)9034 int64x2_t test_vld1q_s64(int64_t const *a) {
9035   return vld1q_s64(a);
9036 }
9037 
9038 // CHECK-LABEL: @test_vld1q_f16(
9039 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
9040 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
9041 // CHECK:   [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]]
9042 // CHECK:   ret <8 x half> [[TMP2]]
test_vld1q_f16(float16_t const * a)9043 float16x8_t test_vld1q_f16(float16_t const *a) {
9044   return vld1q_f16(a);
9045 }
9046 
9047 // CHECK-LABEL: @test_vld1q_f32(
9048 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
9049 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
9050 // CHECK:   [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]]
9051 // CHECK:   ret <4 x float> [[TMP2]]
test_vld1q_f32(float32_t const * a)9052 float32x4_t test_vld1q_f32(float32_t const *a) {
9053   return vld1q_f32(a);
9054 }
9055 
9056 // CHECK-LABEL: @test_vld1q_f64(
9057 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
9058 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
9059 // CHECK:   [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]]
9060 // CHECK:   ret <2 x double> [[TMP2]]
test_vld1q_f64(float64_t const * a)9061 float64x2_t test_vld1q_f64(float64_t const *a) {
9062   return vld1q_f64(a);
9063 }
9064 
9065 // CHECK-LABEL: @test_vld1q_p8(
9066 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9067 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9068 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_p8(poly8_t const * a)9069 poly8x16_t test_vld1q_p8(poly8_t const *a) {
9070   return vld1q_p8(a);
9071 }
9072 
9073 // CHECK-LABEL: @test_vld1q_p16(
9074 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9075 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9076 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9077 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_p16(poly16_t const * a)9078 poly16x8_t test_vld1q_p16(poly16_t const *a) {
9079   return vld1q_p16(a);
9080 }
9081 
9082 // CHECK-LABEL: @test_vld1_u8(
9083 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9084 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9085 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_u8(uint8_t const * a)9086 uint8x8_t test_vld1_u8(uint8_t const *a) {
9087   return vld1_u8(a);
9088 }
9089 
9090 // CHECK-LABEL: @test_vld1_u16(
9091 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9092 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9093 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9094 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_u16(uint16_t const * a)9095 uint16x4_t test_vld1_u16(uint16_t const *a) {
9096   return vld1_u16(a);
9097 }
9098 
9099 // CHECK-LABEL: @test_vld1_u32(
9100 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9101 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9102 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
9103 // CHECK:   ret <2 x i32> [[TMP2]]
test_vld1_u32(uint32_t const * a)9104 uint32x2_t test_vld1_u32(uint32_t const *a) {
9105   return vld1_u32(a);
9106 }
9107 
9108 // CHECK-LABEL: @test_vld1_u64(
9109 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9110 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9111 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
9112 // CHECK:   ret <1 x i64> [[TMP2]]
test_vld1_u64(uint64_t const * a)9113 uint64x1_t test_vld1_u64(uint64_t const *a) {
9114   return vld1_u64(a);
9115 }
9116 
9117 // CHECK-LABEL: @test_vld1_s8(
9118 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9119 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9120 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_s8(int8_t const * a)9121 int8x8_t test_vld1_s8(int8_t const *a) {
9122   return vld1_s8(a);
9123 }
9124 
9125 // CHECK-LABEL: @test_vld1_s16(
9126 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9127 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9128 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9129 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_s16(int16_t const * a)9130 int16x4_t test_vld1_s16(int16_t const *a) {
9131   return vld1_s16(a);
9132 }
9133 
9134 // CHECK-LABEL: @test_vld1_s32(
9135 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9136 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9137 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
9138 // CHECK:   ret <2 x i32> [[TMP2]]
test_vld1_s32(int32_t const * a)9139 int32x2_t test_vld1_s32(int32_t const *a) {
9140   return vld1_s32(a);
9141 }
9142 
9143 // CHECK-LABEL: @test_vld1_s64(
9144 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9145 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9146 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
9147 // CHECK:   ret <1 x i64> [[TMP2]]
test_vld1_s64(int64_t const * a)9148 int64x1_t test_vld1_s64(int64_t const *a) {
9149   return vld1_s64(a);
9150 }
9151 
9152 // CHECK-LABEL: @test_vld1_f16(
9153 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
9154 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
9155 // CHECK:   [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]]
9156 // CHECK:   ret <4 x half> [[TMP2]]
test_vld1_f16(float16_t const * a)9157 float16x4_t test_vld1_f16(float16_t const *a) {
9158   return vld1_f16(a);
9159 }
9160 
9161 // CHECK-LABEL: @test_vld1_f32(
9162 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
9163 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
9164 // CHECK:   [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]]
9165 // CHECK:   ret <2 x float> [[TMP2]]
test_vld1_f32(float32_t const * a)9166 float32x2_t test_vld1_f32(float32_t const *a) {
9167   return vld1_f32(a);
9168 }
9169 
9170 // CHECK-LABEL: @test_vld1_f64(
9171 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
9172 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
9173 // CHECK:   [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]]
9174 // CHECK:   ret <1 x double> [[TMP2]]
test_vld1_f64(float64_t const * a)9175 float64x1_t test_vld1_f64(float64_t const *a) {
9176   return vld1_f64(a);
9177 }
9178 
9179 // CHECK-LABEL: @test_vld1_p8(
9180 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9181 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9182 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_p8(poly8_t const * a)9183 poly8x8_t test_vld1_p8(poly8_t const *a) {
9184   return vld1_p8(a);
9185 }
9186 
9187 // CHECK-LABEL: @test_vld1_p16(
9188 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9189 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9190 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9191 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_p16(poly16_t const * a)9192 poly16x4_t test_vld1_p16(poly16_t const *a) {
9193   return vld1_p16(a);
9194 }
9195 
9196 // CHECK-LABEL: @test_vld2q_u8(
9197 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
9198 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
9199 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9200 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9201 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9202 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9203 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9204 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
9205 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9206 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9207 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
9208 // CHECK:   ret %struct.uint8x16x2_t [[TMP5]]
test_vld2q_u8(uint8_t const * a)9209 uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
9210   return vld2q_u8(a);
9211 }
9212 
9213 // CHECK-LABEL: @test_vld2q_u16(
9214 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
9215 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
9216 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9217 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9218 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9219 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9220 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9221 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9222 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
9223 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9224 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9225 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
9226 // CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
test_vld2q_u16(uint16_t const * a)9227 uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
9228   return vld2q_u16(a);
9229 }
9230 
9231 // CHECK-LABEL: @test_vld2q_u32(
9232 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
9233 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
9234 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9235 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9236 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9237 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9238 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9239 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9240 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
9241 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9242 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9243 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
9244 // CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
test_vld2q_u32(uint32_t const * a)9245 uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
9246   return vld2q_u32(a);
9247 }
9248 
9249 // CHECK-LABEL: @test_vld2q_u64(
9250 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
9251 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
9252 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9253 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9254 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9255 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9256 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9257 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9258 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
9259 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9260 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9261 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
9262 // CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
test_vld2q_u64(uint64_t const * a)9263 uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
9264   return vld2q_u64(a);
9265 }
9266 
9267 // CHECK-LABEL: @test_vld2q_s8(
9268 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
9269 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
9270 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9271 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9272 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9273 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9274 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9275 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
9276 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9277 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9278 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
9279 // CHECK:   ret %struct.int8x16x2_t [[TMP5]]
test_vld2q_s8(int8_t const * a)9280 int8x16x2_t test_vld2q_s8(int8_t const *a) {
9281   return vld2q_s8(a);
9282 }
9283 
9284 // CHECK-LABEL: @test_vld2q_s16(
9285 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
9286 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
9287 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9288 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9289 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9290 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9291 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9292 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9293 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
9294 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9295 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9296 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
9297 // CHECK:   ret %struct.int16x8x2_t [[TMP6]]
test_vld2q_s16(int16_t const * a)9298 int16x8x2_t test_vld2q_s16(int16_t const *a) {
9299   return vld2q_s16(a);
9300 }
9301 
9302 // CHECK-LABEL: @test_vld2q_s32(
9303 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
9304 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
9305 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9306 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9307 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9308 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9309 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9310 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9311 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
9312 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9313 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9314 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
9315 // CHECK:   ret %struct.int32x4x2_t [[TMP6]]
test_vld2q_s32(int32_t const * a)9316 int32x4x2_t test_vld2q_s32(int32_t const *a) {
9317   return vld2q_s32(a);
9318 }
9319 
9320 // CHECK-LABEL: @test_vld2q_s64(
9321 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
9322 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
9323 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9324 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9325 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9326 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9327 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9328 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9329 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
9330 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9331 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9332 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
9333 // CHECK:   ret %struct.int64x2x2_t [[TMP6]]
test_vld2q_s64(int64_t const * a)9334 int64x2x2_t test_vld2q_s64(int64_t const *a) {
9335   return vld2q_s64(a);
9336 }
9337 
9338 // CHECK-LABEL: @test_vld2q_f16(
9339 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
9340 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
9341 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9342 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
9343 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9344 // CHECK:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9345 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
9346 // CHECK:   store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]]
9347 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
9348 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9349 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9350 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
9351 // CHECK:   ret %struct.float16x8x2_t [[TMP6]]
test_vld2q_f16(float16_t const * a)9352 float16x8x2_t test_vld2q_f16(float16_t const *a) {
9353   return vld2q_f16(a);
9354 }
9355 
9356 // CHECK-LABEL: @test_vld2q_f32(
9357 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
9358 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
9359 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9360 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
9361 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9362 // CHECK:   [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9363 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
9364 // CHECK:   store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
9365 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
9366 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9367 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9368 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
9369 // CHECK:   ret %struct.float32x4x2_t [[TMP6]]
test_vld2q_f32(float32_t const * a)9370 float32x4x2_t test_vld2q_f32(float32_t const *a) {
9371   return vld2q_f32(a);
9372 }
9373 
9374 // CHECK-LABEL: @test_vld2q_f64(
9375 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
9376 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
9377 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9378 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
9379 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9380 // CHECK:   [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9381 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
9382 // CHECK:   store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
9383 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
9384 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9385 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9386 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
9387 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
test_vld2q_f64(float64_t const * a)9388 float64x2x2_t test_vld2q_f64(float64_t const *a) {
9389   return vld2q_f64(a);
9390 }
9391 
9392 // CHECK-LABEL: @test_vld2q_p8(
9393 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
9394 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
9395 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9396 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9397 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9398 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9399 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9400 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
9401 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9402 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9403 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
9404 // CHECK:   ret %struct.poly8x16x2_t [[TMP5]]
test_vld2q_p8(poly8_t const * a)9405 poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
9406   return vld2q_p8(a);
9407 }
9408 
9409 // CHECK-LABEL: @test_vld2q_p16(
9410 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
9411 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
9412 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9413 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9414 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9415 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9416 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9417 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9418 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
9419 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9420 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9421 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
9422 // CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
test_vld2q_p16(poly16_t const * a)9423 poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
9424   return vld2q_p16(a);
9425 }
9426 
9427 // CHECK-LABEL: @test_vld2_u8(
9428 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
9429 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
9430 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9431 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9432 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9433 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9434 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9435 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
9436 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9437 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9438 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
9439 // CHECK:   ret %struct.uint8x8x2_t [[TMP5]]
test_vld2_u8(uint8_t const * a)9440 uint8x8x2_t test_vld2_u8(uint8_t const *a) {
9441   return vld2_u8(a);
9442 }
9443 
9444 // CHECK-LABEL: @test_vld2_u16(
9445 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
9446 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
9447 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9448 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9449 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9450 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9451 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9452 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9453 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
9454 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9455 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9456 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
9457 // CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
test_vld2_u16(uint16_t const * a)9458 uint16x4x2_t test_vld2_u16(uint16_t const *a) {
9459   return vld2_u16(a);
9460 }
9461 
9462 // CHECK-LABEL: @test_vld2_u32(
9463 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
9464 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
9465 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9466 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9467 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9468 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9469 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9470 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9471 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
9472 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9473 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9474 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
9475 // CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
test_vld2_u32(uint32_t const * a)9476 uint32x2x2_t test_vld2_u32(uint32_t const *a) {
9477   return vld2_u32(a);
9478 }
9479 
9480 // CHECK-LABEL: @test_vld2_u64(
9481 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
9482 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
9483 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9484 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9485 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9486 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9487 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9488 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9489 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
9490 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9491 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9492 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
9493 // CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
test_vld2_u64(uint64_t const * a)9494 uint64x1x2_t test_vld2_u64(uint64_t const *a) {
9495   return vld2_u64(a);
9496 }
9497 
9498 // CHECK-LABEL: @test_vld2_s8(
9499 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
9500 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
9501 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9502 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9503 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9504 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9505 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9506 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
9507 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9508 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9509 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
9510 // CHECK:   ret %struct.int8x8x2_t [[TMP5]]
test_vld2_s8(int8_t const * a)9511 int8x8x2_t test_vld2_s8(int8_t const *a) {
9512   return vld2_s8(a);
9513 }
9514 
9515 // CHECK-LABEL: @test_vld2_s16(
9516 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
9517 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
9518 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9519 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9520 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9521 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9522 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9523 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9524 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
9525 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9526 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9527 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
9528 // CHECK:   ret %struct.int16x4x2_t [[TMP6]]
test_vld2_s16(int16_t const * a)9529 int16x4x2_t test_vld2_s16(int16_t const *a) {
9530   return vld2_s16(a);
9531 }
9532 
9533 // CHECK-LABEL: @test_vld2_s32(
9534 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
9535 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
9536 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9537 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9538 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9539 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9540 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9541 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9542 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
9543 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9544 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9545 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
9546 // CHECK:   ret %struct.int32x2x2_t [[TMP6]]
test_vld2_s32(int32_t const * a)9547 int32x2x2_t test_vld2_s32(int32_t const *a) {
9548   return vld2_s32(a);
9549 }
9550 
9551 // CHECK-LABEL: @test_vld2_s64(
9552 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
9553 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
9554 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9555 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9556 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9557 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9558 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9559 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9560 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
9561 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9562 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9563 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
9564 // CHECK:   ret %struct.int64x1x2_t [[TMP6]]
test_vld2_s64(int64_t const * a)9565 int64x1x2_t test_vld2_s64(int64_t const *a) {
9566   return vld2_s64(a);
9567 }
9568 
9569 // CHECK-LABEL: @test_vld2_f16(
9570 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
9571 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
9572 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9573 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
9574 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
9575 // CHECK:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>* [[TMP2]])
9576 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
9577 // CHECK:   store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]]
9578 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
9579 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9580 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9581 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
9582 // CHECK:   ret %struct.float16x4x2_t [[TMP6]]
test_vld2_f16(float16_t const * a)9583 float16x4x2_t test_vld2_f16(float16_t const *a) {
9584   return vld2_f16(a);
9585 }
9586 
9587 // CHECK-LABEL: @test_vld2_f32(
9588 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
9589 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
9590 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9591 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
9592 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
9593 // CHECK:   [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
9594 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
9595 // CHECK:   store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
9596 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
9597 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9598 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9599 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
9600 // CHECK:   ret %struct.float32x2x2_t [[TMP6]]
test_vld2_f32(float32_t const * a)9601 float32x2x2_t test_vld2_f32(float32_t const *a) {
9602   return vld2_f32(a);
9603 }
9604 
9605 // CHECK-LABEL: @test_vld2_f64(
9606 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
9607 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
9608 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9609 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
9610 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
9611 // CHECK:   [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]])
9612 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
9613 // CHECK:   store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
9614 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
9615 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9616 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9617 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
9618 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
test_vld2_f64(float64_t const * a)9619 float64x1x2_t test_vld2_f64(float64_t const *a) {
9620   return vld2_f64(a);
9621 }
9622 
9623 // CHECK-LABEL: @test_vld2_p8(
9624 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
9625 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
9626 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9627 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9628 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9629 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9630 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9631 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
9632 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9633 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9634 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
9635 // CHECK:   ret %struct.poly8x8x2_t [[TMP5]]
test_vld2_p8(poly8_t const * a)9636 poly8x8x2_t test_vld2_p8(poly8_t const *a) {
9637   return vld2_p8(a);
9638 }
9639 
9640 // CHECK-LABEL: @test_vld2_p16(
9641 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
9642 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
9643 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9644 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9645 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9646 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9647 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9648 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9649 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
9650 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9651 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9652 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
9653 // CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
test_vld2_p16(poly16_t const * a)9654 poly16x4x2_t test_vld2_p16(poly16_t const *a) {
9655   return vld2_p16(a);
9656 }
9657 
9658 // CHECK-LABEL: @test_vld3q_u8(
9659 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
9660 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
9661 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9662 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9663 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9664 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9665 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9666 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
9667 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9668 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9669 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
9670 // CHECK:   ret %struct.uint8x16x3_t [[TMP5]]
test_vld3q_u8(uint8_t const * a)9671 uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
9672   return vld3q_u8(a);
9673 }
9674 
9675 // CHECK-LABEL: @test_vld3q_u16(
9676 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
9677 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
9678 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9679 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9680 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9681 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9682 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9683 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9684 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
9685 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9686 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9687 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
9688 // CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
test_vld3q_u16(uint16_t const * a)9689 uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
9690   return vld3q_u16(a);
9691 }
9692 
9693 // CHECK-LABEL: @test_vld3q_u32(
9694 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
9695 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
9696 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9697 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9698 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9699 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9700 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9701 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9702 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
9703 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9704 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9705 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
9706 // CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
test_vld3q_u32(uint32_t const * a)9707 uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
9708   return vld3q_u32(a);
9709 }
9710 
9711 // CHECK-LABEL: @test_vld3q_u64(
9712 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
9713 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
9714 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9715 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9716 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9717 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9718 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9719 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9720 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
9721 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9722 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9723 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
9724 // CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
test_vld3q_u64(uint64_t const * a)9725 uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
9726   return vld3q_u64(a);
9727 }
9728 
9729 // CHECK-LABEL: @test_vld3q_s8(
9730 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
9731 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
9732 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9733 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9734 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9735 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9736 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9737 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
9738 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9739 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9740 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
9741 // CHECK:   ret %struct.int8x16x3_t [[TMP5]]
test_vld3q_s8(int8_t const * a)9742 int8x16x3_t test_vld3q_s8(int8_t const *a) {
9743   return vld3q_s8(a);
9744 }
9745 
9746 // CHECK-LABEL: @test_vld3q_s16(
9747 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
9748 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
9749 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9750 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9751 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9752 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9753 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9754 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9755 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
9756 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9757 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9758 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
9759 // CHECK:   ret %struct.int16x8x3_t [[TMP6]]
test_vld3q_s16(int16_t const * a)9760 int16x8x3_t test_vld3q_s16(int16_t const *a) {
9761   return vld3q_s16(a);
9762 }
9763 
9764 // CHECK-LABEL: @test_vld3q_s32(
9765 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
9766 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
9767 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9768 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9769 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9770 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9771 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9772 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9773 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
9774 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9775 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9776 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
9777 // CHECK:   ret %struct.int32x4x3_t [[TMP6]]
test_vld3q_s32(int32_t const * a)9778 int32x4x3_t test_vld3q_s32(int32_t const *a) {
9779   return vld3q_s32(a);
9780 }
9781 
9782 // CHECK-LABEL: @test_vld3q_s64(
9783 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
9784 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
9785 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9786 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9787 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9788 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9789 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9790 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9791 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
9792 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9793 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9794 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
9795 // CHECK:   ret %struct.int64x2x3_t [[TMP6]]
test_vld3q_s64(int64_t const * a)9796 int64x2x3_t test_vld3q_s64(int64_t const *a) {
9797   return vld3q_s64(a);
9798 }
9799 
9800 // CHECK-LABEL: @test_vld3q_f16(
9801 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
9802 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
9803 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9804 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
9805 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9806 // CHECK:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9807 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
9808 // CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
9809 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
9810 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9811 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9812 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
9813 // CHECK:   ret %struct.float16x8x3_t [[TMP6]]
test_vld3q_f16(float16_t const * a)9814 float16x8x3_t test_vld3q_f16(float16_t const *a) {
9815   return vld3q_f16(a);
9816 }
9817 
9818 // CHECK-LABEL: @test_vld3q_f32(
9819 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
9820 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
9821 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9822 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
9823 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9824 // CHECK:   [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9825 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
9826 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
9827 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
9828 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9829 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9830 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
9831 // CHECK:   ret %struct.float32x4x3_t [[TMP6]]
test_vld3q_f32(float32_t const * a)9832 float32x4x3_t test_vld3q_f32(float32_t const *a) {
9833   return vld3q_f32(a);
9834 }
9835 
9836 // CHECK-LABEL: @test_vld3q_f64(
9837 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
9838 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
9839 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9840 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
9841 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9842 // CHECK:   [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9843 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
9844 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
9845 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
9846 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9847 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9848 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
9849 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
test_vld3q_f64(float64_t const * a)9850 float64x2x3_t test_vld3q_f64(float64_t const *a) {
9851   return vld3q_f64(a);
9852 }
9853 
9854 // CHECK-LABEL: @test_vld3q_p8(
9855 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
9856 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
9857 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9858 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9859 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9860 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9861 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9862 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
9863 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9864 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9865 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
9866 // CHECK:   ret %struct.poly8x16x3_t [[TMP5]]
test_vld3q_p8(poly8_t const * a)9867 poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
9868   return vld3q_p8(a);
9869 }
9870 
9871 // CHECK-LABEL: @test_vld3q_p16(
9872 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
9873 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
9874 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9875 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9876 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9877 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9878 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9879 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9880 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
9881 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9882 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9883 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
9884 // CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
test_vld3q_p16(poly16_t const * a)9885 poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
9886   return vld3q_p16(a);
9887 }
9888 
9889 // CHECK-LABEL: @test_vld3_u8(
9890 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
9891 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
9892 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9893 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9894 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9895 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
9896 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
9897 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
9898 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9899 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
9900 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
9901 // CHECK:   ret %struct.uint8x8x3_t [[TMP5]]
test_vld3_u8(uint8_t const * a)9902 uint8x8x3_t test_vld3_u8(uint8_t const *a) {
9903   return vld3_u8(a);
9904 }
9905 
9906 // CHECK-LABEL: @test_vld3_u16(
9907 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
9908 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
9909 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
9910 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9911 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9912 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9913 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
9914 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
9915 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
9916 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
9917 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9918 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
9919 // CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
test_vld3_u16(uint16_t const * a)9920 uint16x4x3_t test_vld3_u16(uint16_t const *a) {
9921   return vld3_u16(a);
9922 }
9923 
9924 // CHECK-LABEL: @test_vld3_u32(
9925 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
9926 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
9927 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
9928 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9929 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9930 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9931 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
9932 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
9933 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
9934 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
9935 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9936 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
9937 // CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
test_vld3_u32(uint32_t const * a)9938 uint32x2x3_t test_vld3_u32(uint32_t const *a) {
9939   return vld3_u32(a);
9940 }
9941 
9942 // CHECK-LABEL: @test_vld3_u64(
9943 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
9944 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
9945 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
9946 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9947 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9948 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9949 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
9950 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
9951 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
9952 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
9953 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9954 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
9955 // CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
test_vld3_u64(uint64_t const * a)9956 uint64x1x3_t test_vld3_u64(uint64_t const *a) {
9957   return vld3_u64(a);
9958 }
9959 
9960 // CHECK-LABEL: @test_vld3_s8(
9961 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
9962 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
9963 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
9964 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9965 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9966 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
9967 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
9968 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
9969 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
9970 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
9971 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
9972 // CHECK:   ret %struct.int8x8x3_t [[TMP5]]
test_vld3_s8(int8_t const * a)9973 int8x8x3_t test_vld3_s8(int8_t const *a) {
9974   return vld3_s8(a);
9975 }
9976 
9977 // CHECK-LABEL: @test_vld3_s16(
9978 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
9979 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
9980 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
9981 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9982 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9983 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9984 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
9985 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
9986 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
9987 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
9988 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9989 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
9990 // CHECK:   ret %struct.int16x4x3_t [[TMP6]]
test_vld3_s16(int16_t const * a)9991 int16x4x3_t test_vld3_s16(int16_t const *a) {
9992   return vld3_s16(a);
9993 }
9994 
9995 // CHECK-LABEL: @test_vld3_s32(
9996 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
9997 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
9998 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
9999 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10000 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10001 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10002 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10003 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10004 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
10005 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10006 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10007 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
10008 // CHECK:   ret %struct.int32x2x3_t [[TMP6]]
test_vld3_s32(int32_t const * a)10009 int32x2x3_t test_vld3_s32(int32_t const *a) {
10010   return vld3_s32(a);
10011 }
10012 
10013 // CHECK-LABEL: @test_vld3_s64(
10014 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
10015 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
10016 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10017 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10018 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10019 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10020 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10021 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10022 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
10023 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10024 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10025 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
10026 // CHECK:   ret %struct.int64x1x3_t [[TMP6]]
test_vld3_s64(int64_t const * a)10027 int64x1x3_t test_vld3_s64(int64_t const *a) {
10028   return vld3_s64(a);
10029 }
10030 
10031 // CHECK-LABEL: @test_vld3_f16(
10032 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
10033 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
10034 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10035 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10036 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10037 // CHECK:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10038 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
10039 // CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10040 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
10041 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10042 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10043 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
10044 // CHECK:   ret %struct.float16x4x3_t [[TMP6]]
test_vld3_f16(float16_t const * a)10045 float16x4x3_t test_vld3_f16(float16_t const *a) {
10046   return vld3_f16(a);
10047 }
10048 
10049 // CHECK-LABEL: @test_vld3_f32(
10050 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
10051 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
10052 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10053 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10054 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10055 // CHECK:   [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10056 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
10057 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10058 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
10059 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10060 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10061 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
10062 // CHECK:   ret %struct.float32x2x3_t [[TMP6]]
test_vld3_f32(float32_t const * a)10063 float32x2x3_t test_vld3_f32(float32_t const *a) {
10064   return vld3_f32(a);
10065 }
10066 
10067 // CHECK-LABEL: @test_vld3_f64(
10068 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
10069 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
10070 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10071 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10072 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10073 // CHECK:   [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10074 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
10075 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10076 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
10077 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10078 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10079 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
10080 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
test_vld3_f64(float64_t const * a)10081 float64x1x3_t test_vld3_f64(float64_t const *a) {
10082   return vld3_f64(a);
10083 }
10084 
10085 // CHECK-LABEL: @test_vld3_p8(
10086 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
10087 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
10088 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10089 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10090 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10091 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10092 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10093 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
10094 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10095 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
10096 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
10097 // CHECK:   ret %struct.poly8x8x3_t [[TMP5]]
test_vld3_p8(poly8_t const * a)10098 poly8x8x3_t test_vld3_p8(poly8_t const *a) {
10099   return vld3_p8(a);
10100 }
10101 
10102 // CHECK-LABEL: @test_vld3_p16(
10103 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
10104 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
10105 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10106 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10107 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10108 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10109 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10110 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10111 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
10112 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10113 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10114 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
10115 // CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
test_vld3_p16(poly16_t const * a)10116 poly16x4x3_t test_vld3_p16(poly16_t const *a) {
10117   return vld3_p16(a);
10118 }
10119 
10120 // CHECK-LABEL: @test_vld4q_u8(
10121 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
10122 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
10123 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10124 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10125 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10126 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10127 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10128 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
10129 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10130 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10131 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
10132 // CHECK:   ret %struct.uint8x16x4_t [[TMP5]]
test_vld4q_u8(uint8_t const * a)10133 uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
10134   return vld4q_u8(a);
10135 }
10136 
10137 // CHECK-LABEL: @test_vld4q_u16(
10138 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
10139 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
10140 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10141 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10142 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10143 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10144 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10145 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10146 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
10147 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10148 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10149 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
10150 // CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
test_vld4q_u16(uint16_t const * a)10151 uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
10152   return vld4q_u16(a);
10153 }
10154 
10155 // CHECK-LABEL: @test_vld4q_u32(
10156 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
10157 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
10158 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10159 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10160 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10161 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10162 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10163 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10164 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
10165 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10166 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10167 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
10168 // CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
test_vld4q_u32(uint32_t const * a)10169 uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
10170   return vld4q_u32(a);
10171 }
10172 
10173 // CHECK-LABEL: @test_vld4q_u64(
10174 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
10175 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
10176 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10177 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10178 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10179 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10180 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10181 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10182 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
10183 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10184 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10185 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
10186 // CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
test_vld4q_u64(uint64_t const * a)10187 uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
10188   return vld4q_u64(a);
10189 }
10190 
10191 // CHECK-LABEL: @test_vld4q_s8(
10192 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
10193 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
10194 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10195 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10196 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10197 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10198 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10199 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
10200 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10201 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10202 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
10203 // CHECK:   ret %struct.int8x16x4_t [[TMP5]]
test_vld4q_s8(int8_t const * a)10204 int8x16x4_t test_vld4q_s8(int8_t const *a) {
10205   return vld4q_s8(a);
10206 }
10207 
10208 // CHECK-LABEL: @test_vld4q_s16(
10209 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
10210 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
10211 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10212 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10213 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10214 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10215 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10216 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10217 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
10218 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10219 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10220 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
10221 // CHECK:   ret %struct.int16x8x4_t [[TMP6]]
test_vld4q_s16(int16_t const * a)10222 int16x8x4_t test_vld4q_s16(int16_t const *a) {
10223   return vld4q_s16(a);
10224 }
10225 
10226 // CHECK-LABEL: @test_vld4q_s32(
10227 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
10228 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
10229 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10230 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10231 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10232 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10233 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10234 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10235 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
10236 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10237 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10238 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
10239 // CHECK:   ret %struct.int32x4x4_t [[TMP6]]
test_vld4q_s32(int32_t const * a)10240 int32x4x4_t test_vld4q_s32(int32_t const *a) {
10241   return vld4q_s32(a);
10242 }
10243 
10244 // CHECK-LABEL: @test_vld4q_s64(
10245 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
10246 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
10247 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10248 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10249 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10250 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10251 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10252 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10253 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
10254 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10255 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10256 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
10257 // CHECK:   ret %struct.int64x2x4_t [[TMP6]]
test_vld4q_s64(int64_t const * a)10258 int64x2x4_t test_vld4q_s64(int64_t const *a) {
10259   return vld4q_s64(a);
10260 }
10261 
10262 // CHECK-LABEL: @test_vld4q_f16(
10263 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
10264 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
10265 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10266 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10267 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
10268 // CHECK:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>* [[TMP2]])
10269 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
10270 // CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
10271 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
10272 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10273 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10274 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
10275 // CHECK:   ret %struct.float16x8x4_t [[TMP6]]
test_vld4q_f16(float16_t const * a)10276 float16x8x4_t test_vld4q_f16(float16_t const *a) {
10277   return vld4q_f16(a);
10278 }
10279 
10280 // CHECK-LABEL: @test_vld4q_f32(
10281 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
10282 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
10283 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10284 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10285 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10286 // CHECK:   [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10287 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
10288 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
10289 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
10290 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10291 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10292 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
10293 // CHECK:   ret %struct.float32x4x4_t [[TMP6]]
test_vld4q_f32(float32_t const * a)10294 float32x4x4_t test_vld4q_f32(float32_t const *a) {
10295   return vld4q_f32(a);
10296 }
10297 
10298 // CHECK-LABEL: @test_vld4q_f64(
10299 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
10300 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
10301 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10302 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10303 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10304 // CHECK:   [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10305 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
10306 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
10307 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
10308 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10309 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10310 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
10311 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
test_vld4q_f64(float64_t const * a)10312 float64x2x4_t test_vld4q_f64(float64_t const *a) {
10313   return vld4q_f64(a);
10314 }
10315 
10316 // CHECK-LABEL: @test_vld4q_p8(
10317 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
10318 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
10319 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10320 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10321 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10322 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10323 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10324 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
10325 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10326 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10327 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
10328 // CHECK:   ret %struct.poly8x16x4_t [[TMP5]]
test_vld4q_p8(poly8_t const * a)10329 poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
10330   return vld4q_p8(a);
10331 }
10332 
10333 // CHECK-LABEL: @test_vld4q_p16(
10334 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
10335 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
10336 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10337 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10338 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10339 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10340 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10341 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10342 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
10343 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10344 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10345 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
10346 // CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
test_vld4q_p16(poly16_t const * a)10347 poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
10348   return vld4q_p16(a);
10349 }
10350 
10351 // CHECK-LABEL: @test_vld4_u8(
10352 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
10353 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
10354 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10355 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10356 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10357 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10358 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10359 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
10360 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10361 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10362 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
10363 // CHECK:   ret %struct.uint8x8x4_t [[TMP5]]
test_vld4_u8(uint8_t const * a)10364 uint8x8x4_t test_vld4_u8(uint8_t const *a) {
10365   return vld4_u8(a);
10366 }
10367 
10368 // CHECK-LABEL: @test_vld4_u16(
10369 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
10370 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
10371 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10372 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10373 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10374 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10375 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10376 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10377 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
10378 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10379 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10380 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
10381 // CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
test_vld4_u16(uint16_t const * a)10382 uint16x4x4_t test_vld4_u16(uint16_t const *a) {
10383   return vld4_u16(a);
10384 }
10385 
10386 // CHECK-LABEL: @test_vld4_u32(
10387 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
10388 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
10389 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10390 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10391 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10392 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10393 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10394 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10395 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
10396 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10397 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10398 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
10399 // CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
test_vld4_u32(uint32_t const * a)10400 uint32x2x4_t test_vld4_u32(uint32_t const *a) {
10401   return vld4_u32(a);
10402 }
10403 
10404 // CHECK-LABEL: @test_vld4_u64(
10405 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
10406 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
10407 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10408 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10409 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10410 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10411 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10412 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10413 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
10414 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10415 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10416 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
10417 // CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
test_vld4_u64(uint64_t const * a)10418 uint64x1x4_t test_vld4_u64(uint64_t const *a) {
10419   return vld4_u64(a);
10420 }
10421 
10422 // CHECK-LABEL: @test_vld4_s8(
10423 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
10424 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
10425 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10426 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10427 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10428 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10429 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10430 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
10431 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10432 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10433 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
10434 // CHECK:   ret %struct.int8x8x4_t [[TMP5]]
test_vld4_s8(int8_t const * a)10435 int8x8x4_t test_vld4_s8(int8_t const *a) {
10436   return vld4_s8(a);
10437 }
10438 
10439 // CHECK-LABEL: @test_vld4_s16(
10440 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
10441 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
10442 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10443 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10444 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10445 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10446 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10447 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10448 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
10449 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10450 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10451 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
10452 // CHECK:   ret %struct.int16x4x4_t [[TMP6]]
test_vld4_s16(int16_t const * a)10453 int16x4x4_t test_vld4_s16(int16_t const *a) {
10454   return vld4_s16(a);
10455 }
10456 
10457 // CHECK-LABEL: @test_vld4_s32(
10458 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
10459 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
10460 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10461 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10462 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10463 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10464 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10465 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10466 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
10467 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10468 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10469 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
10470 // CHECK:   ret %struct.int32x2x4_t [[TMP6]]
test_vld4_s32(int32_t const * a)10471 int32x2x4_t test_vld4_s32(int32_t const *a) {
10472   return vld4_s32(a);
10473 }
10474 
10475 // CHECK-LABEL: @test_vld4_s64(
10476 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
10477 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
10478 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10479 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10480 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10481 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10482 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10483 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10484 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
10485 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10486 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10487 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
10488 // CHECK:   ret %struct.int64x1x4_t [[TMP6]]
test_vld4_s64(int64_t const * a)10489 int64x1x4_t test_vld4_s64(int64_t const *a) {
10490   return vld4_s64(a);
10491 }
10492 
10493 // CHECK-LABEL: @test_vld4_f16(
10494 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
10495 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
10496 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10497 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10498 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10499 // CHECK:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10500 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
10501 // CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10502 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
10503 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10504 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10505 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
10506 // CHECK:   ret %struct.float16x4x4_t [[TMP6]]
test_vld4_f16(float16_t const * a)10507 float16x4x4_t test_vld4_f16(float16_t const *a) {
10508   return vld4_f16(a);
10509 }
10510 
10511 // CHECK-LABEL: @test_vld4_f32(
10512 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
10513 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
10514 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10515 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10516 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10517 // CHECK:   [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10518 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
10519 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10520 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
10521 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10522 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10523 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
10524 // CHECK:   ret %struct.float32x2x4_t [[TMP6]]
test_vld4_f32(float32_t const * a)10525 float32x2x4_t test_vld4_f32(float32_t const *a) {
10526   return vld4_f32(a);
10527 }
10528 
10529 // CHECK-LABEL: @test_vld4_f64(
10530 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
10531 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
10532 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10533 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10534 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10535 // CHECK:   [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10536 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
10537 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10538 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
10539 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10540 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10541 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
10542 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
test_vld4_f64(float64_t const * a)10543 float64x1x4_t test_vld4_f64(float64_t const *a) {
10544   return vld4_f64(a);
10545 }
10546 
10547 // CHECK-LABEL: @test_vld4_p8(
10548 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
10549 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
10550 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10551 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10552 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10553 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10554 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10555 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
10556 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10557 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10558 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
10559 // CHECK:   ret %struct.poly8x8x4_t [[TMP5]]
test_vld4_p8(poly8_t const * a)10560 poly8x8x4_t test_vld4_p8(poly8_t const *a) {
10561   return vld4_p8(a);
10562 }
10563 
10564 // CHECK-LABEL: @test_vld4_p16(
10565 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
10566 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
10567 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10568 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10569 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10570 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10571 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10572 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10573 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
10574 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10575 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10576 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
10577 // CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
test_vld4_p16(poly16_t const * a)10578 poly16x4x4_t test_vld4_p16(poly16_t const *a) {
10579   return vld4_p16(a);
10580 }
10581 
10582 // CHECK-LABEL: @test_vst1q_u8(
10583 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10584 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
10585 // CHECK:   ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)10586 void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
10587   vst1q_u8(a, b);
10588 }
10589 
10590 // CHECK-LABEL: @test_vst1q_u16(
10591 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10592 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10593 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10594 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10595 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10596 // CHECK:   ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)10597 void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
10598   vst1q_u16(a, b);
10599 }
10600 
10601 // CHECK-LABEL: @test_vst1q_u32(
10602 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10603 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10604 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10605 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10606 // CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10607 // CHECK:   ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)10608 void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
10609   vst1q_u32(a, b);
10610 }
10611 
10612 // CHECK-LABEL: @test_vst1q_u64(
10613 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10614 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10615 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10616 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10617 // CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10618 // CHECK:   ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)10619 void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
10620   vst1q_u64(a, b);
10621 }
10622 
10623 // CHECK-LABEL: @test_vst1q_s8(
10624 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10625 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
10626 // CHECK:   ret void
test_vst1q_s8(int8_t * a,int8x16_t b)10627 void test_vst1q_s8(int8_t *a, int8x16_t b) {
10628   vst1q_s8(a, b);
10629 }
10630 
10631 // CHECK-LABEL: @test_vst1q_s16(
10632 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10633 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10634 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10635 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10636 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10637 // CHECK:   ret void
test_vst1q_s16(int16_t * a,int16x8_t b)10638 void test_vst1q_s16(int16_t *a, int16x8_t b) {
10639   vst1q_s16(a, b);
10640 }
10641 
10642 // CHECK-LABEL: @test_vst1q_s32(
10643 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10644 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10645 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10646 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10647 // CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10648 // CHECK:   ret void
test_vst1q_s32(int32_t * a,int32x4_t b)10649 void test_vst1q_s32(int32_t *a, int32x4_t b) {
10650   vst1q_s32(a, b);
10651 }
10652 
10653 // CHECK-LABEL: @test_vst1q_s64(
10654 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10655 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10656 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10657 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10658 // CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10659 // CHECK:   ret void
test_vst1q_s64(int64_t * a,int64x2_t b)10660 void test_vst1q_s64(int64_t *a, int64x2_t b) {
10661   vst1q_s64(a, b);
10662 }
10663 
10664 // CHECK-LABEL: @test_vst1q_f16(
10665 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
10666 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
10667 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
10668 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
10669 // CHECK:   store <8 x half> [[TMP3]], <8 x half>* [[TMP2]]
10670 // CHECK:   ret void
test_vst1q_f16(float16_t * a,float16x8_t b)10671 void test_vst1q_f16(float16_t *a, float16x8_t b) {
10672   vst1q_f16(a, b);
10673 }
10674 
10675 // CHECK-LABEL: @test_vst1q_f32(
10676 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
10677 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
10678 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
10679 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
10680 // CHECK:   store <4 x float> [[TMP3]], <4 x float>* [[TMP2]]
10681 // CHECK:   ret void
test_vst1q_f32(float32_t * a,float32x4_t b)10682 void test_vst1q_f32(float32_t *a, float32x4_t b) {
10683   vst1q_f32(a, b);
10684 }
10685 
10686 // CHECK-LABEL: @test_vst1q_f64(
10687 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
10688 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
10689 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
10690 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
10691 // CHECK:   store <2 x double> [[TMP3]], <2 x double>* [[TMP2]]
10692 // CHECK:   ret void
test_vst1q_f64(float64_t * a,float64x2_t b)10693 void test_vst1q_f64(float64_t *a, float64x2_t b) {
10694   vst1q_f64(a, b);
10695 }
10696 
10697 // CHECK-LABEL: @test_vst1q_p8(
10698 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10699 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
10700 // CHECK:   ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)10701 void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
10702   vst1q_p8(a, b);
10703 }
10704 
10705 // CHECK-LABEL: @test_vst1q_p16(
10706 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10707 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10708 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10709 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10710 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10711 // CHECK:   ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)10712 void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
10713   vst1q_p16(a, b);
10714 }
10715 
10716 // CHECK-LABEL: @test_vst1_u8(
10717 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10718 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
10719 // CHECK:   ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)10720 void test_vst1_u8(uint8_t *a, uint8x8_t b) {
10721   vst1_u8(a, b);
10722 }
10723 
10724 // CHECK-LABEL: @test_vst1_u16(
10725 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10726 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10727 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10728 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10729 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10730 // CHECK:   ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)10731 void test_vst1_u16(uint16_t *a, uint16x4_t b) {
10732   vst1_u16(a, b);
10733 }
10734 
10735 // CHECK-LABEL: @test_vst1_u32(
10736 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10737 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10738 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10739 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10740 // CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10741 // CHECK:   ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)10742 void test_vst1_u32(uint32_t *a, uint32x2_t b) {
10743   vst1_u32(a, b);
10744 }
10745 
10746 // CHECK-LABEL: @test_vst1_u64(
10747 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10748 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10749 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10750 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10751 // CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10752 // CHECK:   ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)10753 void test_vst1_u64(uint64_t *a, uint64x1_t b) {
10754   vst1_u64(a, b);
10755 }
10756 
10757 // CHECK-LABEL: @test_vst1_s8(
10758 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10759 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
10760 // CHECK:   ret void
test_vst1_s8(int8_t * a,int8x8_t b)10761 void test_vst1_s8(int8_t *a, int8x8_t b) {
10762   vst1_s8(a, b);
10763 }
10764 
10765 // CHECK-LABEL: @test_vst1_s16(
10766 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10767 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10768 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10769 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10770 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10771 // CHECK:   ret void
test_vst1_s16(int16_t * a,int16x4_t b)10772 void test_vst1_s16(int16_t *a, int16x4_t b) {
10773   vst1_s16(a, b);
10774 }
10775 
10776 // CHECK-LABEL: @test_vst1_s32(
10777 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10778 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10779 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10780 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10781 // CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10782 // CHECK:   ret void
test_vst1_s32(int32_t * a,int32x2_t b)10783 void test_vst1_s32(int32_t *a, int32x2_t b) {
10784   vst1_s32(a, b);
10785 }
10786 
10787 // CHECK-LABEL: @test_vst1_s64(
10788 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10789 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10790 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10791 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10792 // CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10793 // CHECK:   ret void
test_vst1_s64(int64_t * a,int64x1_t b)10794 void test_vst1_s64(int64_t *a, int64x1_t b) {
10795   vst1_s64(a, b);
10796 }
10797 
10798 // CHECK-LABEL: @test_vst1_f16(
10799 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
10800 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
10801 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
10802 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
10803 // CHECK:   store <4 x half> [[TMP3]], <4 x half>* [[TMP2]]
10804 // CHECK:   ret void
test_vst1_f16(float16_t * a,float16x4_t b)10805 void test_vst1_f16(float16_t *a, float16x4_t b) {
10806   vst1_f16(a, b);
10807 }
10808 
10809 // CHECK-LABEL: @test_vst1_f32(
10810 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
10811 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
10812 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
10813 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
10814 // CHECK:   store <2 x float> [[TMP3]], <2 x float>* [[TMP2]]
10815 // CHECK:   ret void
test_vst1_f32(float32_t * a,float32x2_t b)10816 void test_vst1_f32(float32_t *a, float32x2_t b) {
10817   vst1_f32(a, b);
10818 }
10819 
10820 // CHECK-LABEL: @test_vst1_f64(
10821 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
10822 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
10823 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
10824 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
10825 // CHECK:   store <1 x double> [[TMP3]], <1 x double>* [[TMP2]]
10826 // CHECK:   ret void
test_vst1_f64(float64_t * a,float64x1_t b)10827 void test_vst1_f64(float64_t *a, float64x1_t b) {
10828   vst1_f64(a, b);
10829 }
10830 
10831 // CHECK-LABEL: @test_vst1_p8(
10832 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10833 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
10834 // CHECK:   ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)10835 void test_vst1_p8(poly8_t *a, poly8x8_t b) {
10836   vst1_p8(a, b);
10837 }
10838 
10839 // CHECK-LABEL: @test_vst1_p16(
10840 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10841 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10842 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10843 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10844 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10845 // CHECK:   ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)10846 void test_vst1_p16(poly16_t *a, poly16x4_t b) {
10847   vst1_p16(a, b);
10848 }
10849 
10850 // CHECK-LABEL: @test_vst2q_u8(
10851 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
10852 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
10853 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
10854 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
10855 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
10856 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
10857 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10858 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10859 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
10860 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
10861 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10862 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
10863 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
10864 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
10865 // CHECK:   ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)10866 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
10867   vst2q_u8(a, b);
10868 }
10869 
10870 // CHECK-LABEL: @test_vst2q_u16(
10871 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
10872 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
10873 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
10874 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
10875 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
10876 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
10877 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10878 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
10879 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10880 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
10881 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
10882 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10883 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10884 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
10885 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
10886 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10887 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10888 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10889 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
10890 // CHECK:   ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)10891 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
10892   vst2q_u16(a, b);
10893 }
10894 
10895 // CHECK-LABEL: @test_vst2q_u32(
10896 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
10897 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
10898 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
10899 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
10900 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
10901 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
10902 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10903 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
10904 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
10905 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
10906 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
10907 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10908 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
10909 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
10910 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
10911 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10912 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10913 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10914 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
10915 // CHECK:   ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)10916 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
10917   vst2q_u32(a, b);
10918 }
10919 
10920 // CHECK-LABEL: @test_vst2q_u64(
10921 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
10922 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
10923 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
10924 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
10925 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
10926 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
10927 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10928 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
10929 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
10930 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
10931 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
10932 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
10933 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
10934 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
10935 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
10936 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
10937 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
10938 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
10939 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
10940 // CHECK:   ret void
test_vst2q_u64(uint64_t * a,uint64x2x2_t b)10941 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
10942   vst2q_u64(a, b);
10943 }
10944 
10945 // CHECK-LABEL: @test_vst2q_s8(
10946 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
10947 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
10948 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
10949 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
10950 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
10951 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
10952 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10953 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
10954 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
10955 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
10956 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
10957 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
10958 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
10959 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
10960 // CHECK:   ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)10961 void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
10962   vst2q_s8(a, b);
10963 }
10964 
10965 // CHECK-LABEL: @test_vst2q_s16(
10966 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
10967 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
10968 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
10969 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
10970 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
10971 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
10972 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10973 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
10974 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
10975 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
10976 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
10977 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10978 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
10979 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
10980 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
10981 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10982 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10983 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10984 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
10985 // CHECK:   ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)10986 void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
10987   vst2q_s16(a, b);
10988 }
10989 
10990 // CHECK-LABEL: @test_vst2q_s32(
10991 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
10992 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
10993 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
10994 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
10995 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
10996 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
10997 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10998 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
10999 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11000 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
11001 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11002 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11003 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11004 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11005 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11006 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11007 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11008 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11009 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11010 // CHECK:   ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)11011 void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
11012   vst2q_s32(a, b);
11013 }
11014 
11015 // CHECK-LABEL: @test_vst2q_s64(
11016 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
11017 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
11018 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
11019 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11020 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
11021 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
11022 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11023 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11024 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11025 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11026 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11027 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11028 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11029 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11030 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11031 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11032 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11033 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11034 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11035 // CHECK:   ret void
test_vst2q_s64(int64_t * a,int64x2x2_t b)11036 void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
11037   vst2q_s64(a, b);
11038 }
11039 
11040 // CHECK-LABEL: @test_vst2q_f16(
11041 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
11042 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
11043 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
11044 // CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
11045 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
11046 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
11047 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11048 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11049 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11050 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
11051 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11052 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11053 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11054 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
11055 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11056 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11057 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11058 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11059 // CHECK:   call void @llvm.aarch64.neon.st2.v8f16.p0i8(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i8* [[TMP2]])
11060 // CHECK:   ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)11061 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
11062   vst2q_f16(a, b);
11063 }
11064 
11065 // CHECK-LABEL: @test_vst2q_f32(
11066 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
11067 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
11068 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
11069 // CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
11070 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
11071 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
11072 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11073 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11074 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11075 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
11076 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11077 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11078 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11079 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
11080 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11081 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11082 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11083 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11084 // CHECK:   call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]])
11085 // CHECK:   ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)11086 void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
11087   vst2q_f32(a, b);
11088 }
11089 
11090 // CHECK-LABEL: @test_vst2q_f64(
11091 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
11092 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
11093 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
11094 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
11095 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
11096 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
11097 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11098 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
11099 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11100 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
11101 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11102 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11103 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11104 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
11105 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11106 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11107 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11108 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11109 // CHECK:   call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]])
11110 // CHECK:   ret void
test_vst2q_f64(float64_t * a,float64x2x2_t b)11111 void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
11112   vst2q_f64(a, b);
11113 }
11114 
11115 // CHECK-LABEL: @test_vst2q_p8(
11116 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
11117 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
11118 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
11119 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11120 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
11121 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
11122 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11123 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11124 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11125 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11126 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11127 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11128 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11129 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11130 // CHECK:   ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)11131 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
11132   vst2q_p8(a, b);
11133 }
11134 
11135 // CHECK-LABEL: @test_vst2q_p16(
11136 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
11137 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
11138 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
11139 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11140 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
11141 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
11142 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11143 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11144 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11145 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11146 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11147 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11148 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11149 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11150 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11151 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11152 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11153 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11154 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11155 // CHECK:   ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)11156 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
11157   vst2q_p16(a, b);
11158 }
11159 
11160 // CHECK-LABEL: @test_vst2_u8(
11161 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
11162 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
11163 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
11164 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11165 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
11166 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
11167 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11168 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11169 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11170 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11171 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11172 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11173 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11174 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11175 // CHECK:   ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)11176 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
11177   vst2_u8(a, b);
11178 }
11179 
11180 // CHECK-LABEL: @test_vst2_u16(
11181 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
11182 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
11183 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
11184 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11185 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
11186 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
11187 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11188 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11189 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11190 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11191 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11192 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11193 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11194 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11195 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11196 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11197 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11198 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11199 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11200 // CHECK:   ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)11201 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
11202   vst2_u16(a, b);
11203 }
11204 
11205 // CHECK-LABEL: @test_vst2_u32(
11206 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
11207 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
11208 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
11209 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11210 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
11211 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
11212 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11213 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11214 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11215 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11216 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11217 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11218 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11219 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11220 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11221 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11222 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11223 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11224 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11225 // CHECK:   ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)11226 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
11227   vst2_u32(a, b);
11228 }
11229 
11230 // CHECK-LABEL: @test_vst2_u64(
11231 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
11232 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
11233 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
11234 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11235 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
11236 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
11237 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11238 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11239 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11240 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11241 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11242 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11243 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11244 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11245 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11246 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11247 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11248 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11249 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11250 // CHECK:   ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)11251 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
11252   vst2_u64(a, b);
11253 }
11254 
11255 // CHECK-LABEL: @test_vst2_s8(
11256 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
11257 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
11258 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
11259 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11260 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
11261 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
11262 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11263 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11264 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11265 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11266 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11267 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11268 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11269 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11270 // CHECK:   ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)11271 void test_vst2_s8(int8_t *a, int8x8x2_t b) {
11272   vst2_s8(a, b);
11273 }
11274 
11275 // CHECK-LABEL: @test_vst2_s16(
11276 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
11277 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
11278 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
11279 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11280 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
11281 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
11282 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11283 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11284 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11285 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11286 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11287 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11288 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11289 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11290 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11291 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11292 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11293 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11294 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11295 // CHECK:   ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)11296 void test_vst2_s16(int16_t *a, int16x4x2_t b) {
11297   vst2_s16(a, b);
11298 }
11299 
11300 // CHECK-LABEL: @test_vst2_s32(
11301 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
11302 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
11303 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
11304 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11305 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
11306 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
11307 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11308 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11309 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11310 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11311 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11312 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11313 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11314 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11315 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11316 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11317 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11318 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11319 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11320 // CHECK:   ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)11321 void test_vst2_s32(int32_t *a, int32x2x2_t b) {
11322   vst2_s32(a, b);
11323 }
11324 
11325 // CHECK-LABEL: @test_vst2_s64(
11326 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
11327 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
11328 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
11329 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11330 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
11331 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
11332 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11333 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11334 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11335 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11336 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11337 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11338 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11339 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11340 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11341 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11342 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11343 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11344 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11345 // CHECK:   ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)11346 void test_vst2_s64(int64_t *a, int64x1x2_t b) {
11347   vst2_s64(a, b);
11348 }
11349 
11350 // CHECK-LABEL: @test_vst2_f16(
11351 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
11352 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
11353 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
11354 // CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
11355 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
11356 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
11357 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11358 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11359 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11360 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
11361 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
11362 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
11363 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11364 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
11365 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
11366 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
11367 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
11368 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
11369 // CHECK:   call void @llvm.aarch64.neon.st2.v4f16.p0i8(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i8* [[TMP2]])
11370 // CHECK:   ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)11371 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
11372   vst2_f16(a, b);
11373 }
11374 
11375 // CHECK-LABEL: @test_vst2_f32(
11376 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
11377 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
11378 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
11379 // CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
11380 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
11381 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
11382 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11383 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11384 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11385 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
11386 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
11387 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
11388 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11389 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
11390 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
11391 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
11392 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
11393 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
11394 // CHECK:   call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]])
11395 // CHECK:   ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)11396 void test_vst2_f32(float32_t *a, float32x2x2_t b) {
11397   vst2_f32(a, b);
11398 }
11399 
11400 // CHECK-LABEL: @test_vst2_f64(
11401 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
11402 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
11403 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
11404 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
11405 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
11406 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
11407 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11408 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
11409 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11410 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
11411 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
11412 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
11413 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11414 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
11415 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
11416 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
11417 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
11418 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
11419 // CHECK:   call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]])
11420 // CHECK:   ret void
test_vst2_f64(float64_t * a,float64x1x2_t b)11421 void test_vst2_f64(float64_t *a, float64x1x2_t b) {
11422   vst2_f64(a, b);
11423 }
11424 
11425 // CHECK-LABEL: @test_vst2_p8(
11426 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
11427 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
11428 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
11429 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11430 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
11431 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
11432 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11433 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11434 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11435 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11436 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11437 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11438 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11439 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11440 // CHECK:   ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)11441 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
11442   vst2_p8(a, b);
11443 }
11444 
11445 // CHECK-LABEL: @test_vst2_p16(
11446 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
11447 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
11448 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
11449 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11450 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
11451 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
11452 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11453 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11454 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11455 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11456 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11457 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11458 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11459 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11460 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11461 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11462 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11463 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11464 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11465 // CHECK:   ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)11466 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
11467   vst2_p16(a, b);
11468 }
11469 
11470 // CHECK-LABEL: @test_vst3q_u8(
11471 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
11472 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
11473 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
11474 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11475 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
11476 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
11477 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11478 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11479 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11480 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11481 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11482 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11483 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11484 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11485 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11486 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11487 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11488 // CHECK:   ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)11489 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
11490   vst3q_u8(a, b);
11491 }
11492 
11493 // CHECK-LABEL: @test_vst3q_u16(
11494 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
11495 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
11496 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
11497 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11498 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
11499 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
11500 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11501 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11502 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11503 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11504 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11505 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11506 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11507 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11508 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11509 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11510 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11511 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11512 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11513 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11514 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11515 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11516 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11517 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11518 // CHECK:   ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)11519 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
11520   vst3q_u16(a, b);
11521 }
11522 
11523 // CHECK-LABEL: @test_vst3q_u32(
11524 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
11525 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
11526 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
11527 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11528 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
11529 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
11530 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11531 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11532 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11533 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11534 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11535 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11536 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11537 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11538 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11539 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11540 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11541 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11542 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11543 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11544 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11545 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11546 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11547 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11548 // CHECK:   ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)11549 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
11550   vst3q_u32(a, b);
11551 }
11552 
11553 // CHECK-LABEL: @test_vst3q_u64(
11554 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
11555 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
11556 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
11557 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11558 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
11559 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
11560 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11561 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11562 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11563 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11564 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11565 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11566 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11567 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11568 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11569 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11570 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11571 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11572 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11573 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11574 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11575 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11576 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11577 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11578 // CHECK:   ret void
test_vst3q_u64(uint64_t * a,uint64x2x3_t b)11579 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
11580   vst3q_u64(a, b);
11581 }
11582 
11583 // CHECK-LABEL: @test_vst3q_s8(
11584 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
11585 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
11586 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
11587 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11588 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
11589 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
11590 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11591 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11592 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11593 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11594 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11595 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11596 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11597 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11598 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11599 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11600 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11601 // CHECK:   ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)11602 void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
11603   vst3q_s8(a, b);
11604 }
11605 
11606 // CHECK-LABEL: @test_vst3q_s16(
11607 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
11608 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
11609 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
11610 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11611 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
11612 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
11613 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11614 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11615 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11616 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11617 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11618 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11619 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11620 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11621 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11622 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11623 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11624 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11625 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11626 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11627 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11628 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11629 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11630 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11631 // CHECK:   ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)11632 void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
11633   vst3q_s16(a, b);
11634 }
11635 
11636 // CHECK-LABEL: @test_vst3q_s32(
11637 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
11638 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
11639 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
11640 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11641 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
11642 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
11643 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11644 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11645 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11646 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11647 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11648 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11649 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11650 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11651 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11652 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11653 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11654 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11655 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11656 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11657 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11658 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11659 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11660 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11661 // CHECK:   ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)11662 void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
11663   vst3q_s32(a, b);
11664 }
11665 
11666 // CHECK-LABEL: @test_vst3q_s64(
11667 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
11668 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
11669 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
11670 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11671 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
11672 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
11673 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11674 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11675 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11676 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11677 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11678 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11679 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11680 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11681 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11682 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11683 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11684 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11685 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11686 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11687 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11688 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11689 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11690 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11691 // CHECK:   ret void
test_vst3q_s64(int64_t * a,int64x2x3_t b)11692 void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
11693   vst3q_s64(a, b);
11694 }
11695 
11696 // CHECK-LABEL: @test_vst3q_f16(
11697 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
11698 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
11699 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
11700 // CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
11701 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
11702 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
11703 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11704 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11705 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11706 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
11707 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11708 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11709 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11710 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
11711 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11712 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11713 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11714 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
11715 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
11716 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
11717 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11718 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11719 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
11720 // CHECK:   call void @llvm.aarch64.neon.st3.v8f16.p0i8(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i8* [[TMP2]])
11721 // CHECK:   ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)11722 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
11723   vst3q_f16(a, b);
11724 }
11725 
11726 // CHECK-LABEL: @test_vst3q_f32(
11727 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
11728 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
11729 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
11730 // CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
11731 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
11732 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
11733 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11734 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11735 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11736 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
11737 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11738 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11739 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11740 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
11741 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11742 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11743 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11744 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
11745 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
11746 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
11747 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11748 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11749 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
11750 // CHECK:   call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]])
11751 // CHECK:   ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)11752 void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
11753   vst3q_f32(a, b);
11754 }
11755 
11756 // CHECK-LABEL: @test_vst3q_f64(
11757 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
11758 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
11759 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
11760 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
11761 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
11762 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
11763 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11764 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
11765 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11766 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
11767 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11768 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11769 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11770 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
11771 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11772 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11773 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11774 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
11775 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
11776 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
11777 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11778 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11779 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
11780 // CHECK:   call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]])
11781 // CHECK:   ret void
test_vst3q_f64(float64_t * a,float64x2x3_t b)11782 void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
11783   vst3q_f64(a, b);
11784 }
11785 
11786 // CHECK-LABEL: @test_vst3q_p8(
11787 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
11788 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
11789 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
11790 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11791 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
11792 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
11793 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11794 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11795 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11796 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11797 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11798 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11799 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11800 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11801 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11802 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11803 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11804 // CHECK:   ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)11805 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
11806   vst3q_p8(a, b);
11807 }
11808 
11809 // CHECK-LABEL: @test_vst3q_p16(
11810 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
11811 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
11812 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
11813 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11814 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
11815 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
11816 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11817 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11818 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11819 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11820 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11821 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11822 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11823 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11824 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11825 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11826 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11827 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11828 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11829 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11830 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11831 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11832 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11833 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11834 // CHECK:   ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)11835 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
11836   vst3q_p16(a, b);
11837 }
11838 
11839 // CHECK-LABEL: @test_vst3_u8(
11840 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
11841 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
11842 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
11843 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
11844 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
11845 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
11846 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11847 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11848 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
11849 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11850 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11851 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11852 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11853 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11854 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
11855 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
11856 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
11857 // CHECK:   ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)11858 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
11859   vst3_u8(a, b);
11860 }
11861 
11862 // CHECK-LABEL: @test_vst3_u16(
11863 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
11864 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
11865 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
11866 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
11867 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
11868 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
11869 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11870 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11871 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11872 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
11873 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11874 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11875 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11876 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11877 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11878 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11879 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11880 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
11881 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
11882 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11883 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11884 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11885 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11886 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
11887 // CHECK:   ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)11888 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
11889   vst3_u16(a, b);
11890 }
11891 
11892 // CHECK-LABEL: @test_vst3_u32(
11893 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
11894 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
11895 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
11896 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
11897 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
11898 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
11899 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11900 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11901 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11902 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
11903 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11904 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11905 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11906 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11907 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11908 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11909 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11910 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
11911 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
11912 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
11913 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11914 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11915 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
11916 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
11917 // CHECK:   ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)11918 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
11919   vst3_u32(a, b);
11920 }
11921 
11922 // CHECK-LABEL: @test_vst3_u64(
11923 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
11924 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
11925 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
11926 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
11927 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
11928 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
11929 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11930 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11931 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11932 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
11933 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11934 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11935 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11936 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11937 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11938 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11939 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11940 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
11941 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
11942 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
11943 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11944 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11945 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
11946 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
11947 // CHECK:   ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)11948 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
11949   vst3_u64(a, b);
11950 }
11951 
11952 // CHECK-LABEL: @test_vst3_s8(
11953 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
11954 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
11955 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
11956 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
11957 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
11958 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
11959 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11960 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11961 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
11962 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11963 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11964 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11965 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11966 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11967 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
11968 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
11969 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
11970 // CHECK:   ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)11971 void test_vst3_s8(int8_t *a, int8x8x3_t b) {
11972   vst3_s8(a, b);
11973 }
11974 
11975 // CHECK-LABEL: @test_vst3_s16(
11976 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
11977 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
11978 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
11979 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
11980 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
11981 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
11982 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11983 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11984 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11985 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
11986 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11987 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11988 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11989 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11990 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11991 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11992 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11993 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
11994 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
11995 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11996 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11997 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11998 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11999 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12000 // CHECK:   ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)12001 void test_vst3_s16(int16_t *a, int16x4x3_t b) {
12002   vst3_s16(a, b);
12003 }
12004 
12005 // CHECK-LABEL: @test_vst3_s32(
12006 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
12007 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
12008 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
12009 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
12010 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
12011 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
12012 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12013 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12014 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12015 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12016 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12017 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12018 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12019 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12020 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12021 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12022 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12023 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12024 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12025 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12026 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12027 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12028 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12029 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12030 // CHECK:   ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)12031 void test_vst3_s32(int32_t *a, int32x2x3_t b) {
12032   vst3_s32(a, b);
12033 }
12034 
12035 // CHECK-LABEL: @test_vst3_s64(
12036 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
12037 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
12038 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
12039 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12040 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
12041 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
12042 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12043 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12044 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12045 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12046 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12047 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12048 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12049 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12050 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12051 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12052 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12053 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12054 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12055 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12056 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12057 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12058 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12059 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12060 // CHECK:   ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)12061 void test_vst3_s64(int64_t *a, int64x1x3_t b) {
12062   vst3_s64(a, b);
12063 }
12064 
12065 // CHECK-LABEL: @test_vst3_f16(
12066 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
12067 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
12068 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
12069 // CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
12070 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
12071 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
12072 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12073 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12074 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12075 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
12076 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12077 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12078 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12079 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
12080 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12081 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12082 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12083 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
12084 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
12085 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12086 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12087 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12088 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12089 // CHECK:   call void @llvm.aarch64.neon.st3.v4f16.p0i8(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i8* [[TMP2]])
12090 // CHECK:   ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)12091 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
12092   vst3_f16(a, b);
12093 }
12094 
12095 // CHECK-LABEL: @test_vst3_f32(
12096 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
12097 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
12098 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
12099 // CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
12100 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
12101 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
12102 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12103 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12104 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12105 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
12106 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12107 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12108 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12109 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
12110 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12111 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12112 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12113 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
12114 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
12115 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12116 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12117 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12118 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12119 // CHECK:   call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]])
12120 // CHECK:   ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)12121 void test_vst3_f32(float32_t *a, float32x2x3_t b) {
12122   vst3_f32(a, b);
12123 }
12124 
12125 // CHECK-LABEL: @test_vst3_f64(
12126 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
12127 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
12128 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
12129 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
12130 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
12131 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
12132 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12133 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12134 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12135 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
12136 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12137 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12138 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12139 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
12140 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12141 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12142 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12143 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
12144 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
12145 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12146 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12147 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12148 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12149 // CHECK:   call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]])
12150 // CHECK:   ret void
test_vst3_f64(float64_t * a,float64x1x3_t b)12151 void test_vst3_f64(float64_t *a, float64x1x3_t b) {
12152   vst3_f64(a, b);
12153 }
12154 
12155 // CHECK-LABEL: @test_vst3_p8(
12156 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
12157 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
12158 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
12159 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12160 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
12161 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
12162 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12163 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12164 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12165 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12166 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12167 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12168 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12169 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12170 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12171 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12172 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12173 // CHECK:   ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)12174 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
12175   vst3_p8(a, b);
12176 }
12177 
12178 // CHECK-LABEL: @test_vst3_p16(
12179 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
12180 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
12181 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
12182 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12183 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
12184 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
12185 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12186 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12187 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12188 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12189 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12190 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12191 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12192 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12193 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12194 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12195 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12196 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12197 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12198 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12199 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12200 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12201 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12202 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12203 // CHECK:   ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)12204 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
12205   vst3_p16(a, b);
12206 }
12207 
12208 // CHECK-LABEL: @test_vst4q_u8(
12209 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
12210 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
12211 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
12212 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12213 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
12214 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
12215 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12216 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12217 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12218 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12219 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12220 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12221 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12222 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12223 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12224 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12225 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12226 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12227 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12228 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12229 // CHECK:   ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)12230 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
12231   vst4q_u8(a, b);
12232 }
12233 
12234 // CHECK-LABEL: @test_vst4q_u16(
12235 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
12236 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
12237 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
12238 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12239 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
12240 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
12241 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12242 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12243 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12244 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12245 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12246 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12247 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12248 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12249 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12250 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12251 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12252 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12253 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12254 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12255 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12256 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12257 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12258 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12259 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12260 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12261 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12262 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12263 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12264 // CHECK:   ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)12265 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
12266   vst4q_u16(a, b);
12267 }
12268 
12269 // CHECK-LABEL: @test_vst4q_u32(
12270 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
12271 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
12272 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
12273 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12274 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
12275 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
12276 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12277 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12278 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12279 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12280 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12281 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12282 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12283 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12284 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12285 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12286 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12287 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12288 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12289 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12290 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12291 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12292 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12293 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12294 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12295 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12296 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12297 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12298 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12299 // CHECK:   ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)12300 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
12301   vst4q_u32(a, b);
12302 }
12303 
12304 // CHECK-LABEL: @test_vst4q_u64(
12305 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
12306 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
12307 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
12308 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12309 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
12310 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
12311 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12312 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12313 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12314 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12315 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12316 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12317 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12318 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12319 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12320 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12321 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12322 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12323 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12324 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12325 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12326 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12327 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12328 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12329 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12330 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12331 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12332 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12333 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12334 // CHECK:   ret void
test_vst4q_u64(uint64_t * a,uint64x2x4_t b)12335 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
12336   vst4q_u64(a, b);
12337 }
12338 
12339 // CHECK-LABEL: @test_vst4q_s8(
12340 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
12341 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
12342 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
12343 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12344 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
12345 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
12346 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12347 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12348 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12349 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12350 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12351 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12352 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12353 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12354 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12355 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12356 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12357 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12358 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12359 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12360 // CHECK:   ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)12361 void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
12362   vst4q_s8(a, b);
12363 }
12364 
12365 // CHECK-LABEL: @test_vst4q_s16(
12366 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
12367 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
12368 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
12369 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12370 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
12371 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
12372 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12373 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12374 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12375 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12376 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12377 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12378 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12379 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12380 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12381 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12382 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12383 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12384 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12385 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12386 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12387 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12388 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12389 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12390 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12391 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12392 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12393 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12394 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12395 // CHECK:   ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)12396 void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
12397   vst4q_s16(a, b);
12398 }
12399 
12400 // CHECK-LABEL: @test_vst4q_s32(
12401 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
12402 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
12403 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
12404 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12405 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
12406 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
12407 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12408 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12409 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12410 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12411 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12412 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12413 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12414 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12415 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12416 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12417 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12418 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12419 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12420 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12421 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12422 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12423 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12424 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12425 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12426 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12427 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12428 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12429 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12430 // CHECK:   ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)12431 void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
12432   vst4q_s32(a, b);
12433 }
12434 
12435 // CHECK-LABEL: @test_vst4q_s64(
12436 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
12437 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
12438 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
12439 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12440 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
12441 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
12442 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12443 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12444 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12445 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12446 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12447 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12448 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12449 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12450 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12451 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12452 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12453 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12454 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12455 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12456 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12457 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12458 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12459 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12460 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12461 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12462 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12463 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12464 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12465 // CHECK:   ret void
test_vst4q_s64(int64_t * a,int64x2x4_t b)12466 void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
12467   vst4q_s64(a, b);
12468 }
12469 
12470 // CHECK-LABEL: @test_vst4q_f16(
12471 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
12472 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
12473 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
12474 // CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
12475 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
12476 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
12477 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12478 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12479 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12480 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
12481 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
12482 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
12483 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12484 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
12485 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
12486 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
12487 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12488 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
12489 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
12490 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
12491 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12492 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
12493 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
12494 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
12495 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
12496 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
12497 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
12498 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
12499 // CHECK:   call void @llvm.aarch64.neon.st4.v8f16.p0i8(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i8* [[TMP2]])
12500 // CHECK:   ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)12501 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
12502   vst4q_f16(a, b);
12503 }
12504 
12505 // CHECK-LABEL: @test_vst4q_f32(
12506 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
12507 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
12508 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
12509 // CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
12510 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
12511 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
12512 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12513 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12514 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12515 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
12516 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
12517 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
12518 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12519 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
12520 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12521 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12522 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12523 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
12524 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
12525 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
12526 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12527 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
12528 // CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
12529 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
12530 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12531 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12532 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
12533 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
12534 // CHECK:   call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]])
12535 // CHECK:   ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)12536 void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
12537   vst4q_f32(a, b);
12538 }
12539 
12540 // CHECK-LABEL: @test_vst4q_f64(
12541 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
12542 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
12543 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
12544 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
12545 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
12546 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
12547 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12548 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12549 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12550 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
12551 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12552 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12553 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12554 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
12555 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12556 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12557 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12558 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
12559 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
12560 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12561 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12562 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
12563 // CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
12564 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
12565 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12566 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12567 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12568 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
12569 // CHECK:   call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]])
12570 // CHECK:   ret void
test_vst4q_f64(float64_t * a,float64x2x4_t b)12571 void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
12572   vst4q_f64(a, b);
12573 }
12574 
12575 // CHECK-LABEL: @test_vst4q_p8(
12576 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
12577 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
12578 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
12579 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12580 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
12581 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
12582 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12583 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12584 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12585 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12586 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12587 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12588 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12589 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12590 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12591 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12592 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12593 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12594 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12595 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12596 // CHECK:   ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)12597 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
12598   vst4q_p8(a, b);
12599 }
12600 
12601 // CHECK-LABEL: @test_vst4q_p16(
12602 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
12603 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
12604 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
12605 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12606 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
12607 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
12608 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12609 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12610 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12611 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12612 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12613 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12614 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12615 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12616 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12617 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12618 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12619 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12620 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12621 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12622 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12623 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12624 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12625 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12626 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12627 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12628 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12629 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12630 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12631 // CHECK:   ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)12632 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
12633   vst4q_p16(a, b);
12634 }
12635 
12636 // CHECK-LABEL: @test_vst4_u8(
12637 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
12638 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
12639 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
12640 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12641 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
12642 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
12643 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12644 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12645 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12646 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12647 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12648 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12649 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12650 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12651 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12652 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12653 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12654 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12655 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12656 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12657 // CHECK:   ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)12658 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
12659   vst4_u8(a, b);
12660 }
12661 
12662 // CHECK-LABEL: @test_vst4_u16(
12663 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
12664 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
12665 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
12666 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12667 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
12668 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
12669 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12670 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12671 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12672 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12673 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12674 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12675 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12676 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12677 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12678 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12679 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12680 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12681 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12682 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12683 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12684 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12685 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12686 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12687 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12688 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12689 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12690 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12691 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12692 // CHECK:   ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)12693 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
12694   vst4_u16(a, b);
12695 }
12696 
12697 // CHECK-LABEL: @test_vst4_u32(
12698 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
12699 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
12700 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
12701 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12702 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
12703 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
12704 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12705 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12706 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12707 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12708 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12709 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12710 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12711 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12712 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12713 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12714 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12715 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12716 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12717 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12718 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12719 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12720 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12721 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12722 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12723 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12724 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12725 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12726 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12727 // CHECK:   ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)12728 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
12729   vst4_u32(a, b);
12730 }
12731 
12732 // CHECK-LABEL: @test_vst4_u64(
12733 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
12734 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
12735 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
12736 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12737 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
12738 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
12739 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12740 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12741 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12742 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12743 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12744 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12745 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12746 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12747 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12748 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12749 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12750 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12751 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12752 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12753 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12754 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12755 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12756 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12757 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12758 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12759 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12760 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12761 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12762 // CHECK:   ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)12763 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
12764   vst4_u64(a, b);
12765 }
12766 
12767 // CHECK-LABEL: @test_vst4_s8(
12768 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
12769 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
12770 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
12771 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12772 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
12773 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
12774 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12775 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12776 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12777 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12778 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12779 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12780 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12781 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12782 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12783 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12784 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12785 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12786 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12787 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12788 // CHECK:   ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)12789 void test_vst4_s8(int8_t *a, int8x8x4_t b) {
12790   vst4_s8(a, b);
12791 }
12792 
12793 // CHECK-LABEL: @test_vst4_s16(
12794 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
12795 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
12796 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
12797 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12798 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
12799 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
12800 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12801 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12802 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12803 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12804 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12805 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12806 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12807 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12808 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12809 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12810 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12811 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12812 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12813 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12814 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12815 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12816 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12817 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12818 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12819 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12820 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12821 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12822 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12823 // CHECK:   ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)12824 void test_vst4_s16(int16_t *a, int16x4x4_t b) {
12825   vst4_s16(a, b);
12826 }
12827 
12828 // CHECK-LABEL: @test_vst4_s32(
12829 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
12830 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
12831 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
12832 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12833 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
12834 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
12835 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12836 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12837 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12838 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12839 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12840 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12841 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12842 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12843 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12844 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12845 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12846 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12847 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12848 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12849 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12850 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12851 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12852 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12853 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12854 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12855 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12856 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12857 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12858 // CHECK:   ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)12859 void test_vst4_s32(int32_t *a, int32x2x4_t b) {
12860   vst4_s32(a, b);
12861 }
12862 
12863 // CHECK-LABEL: @test_vst4_s64(
12864 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
12865 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
12866 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
12867 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12868 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
12869 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
12870 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12871 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12872 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12873 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12874 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12875 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12876 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12877 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12878 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12879 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12880 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12881 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12882 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12883 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12884 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12885 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12886 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12887 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12888 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12889 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12890 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12891 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12892 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12893 // CHECK:   ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)12894 void test_vst4_s64(int64_t *a, int64x1x4_t b) {
12895   vst4_s64(a, b);
12896 }
12897 
12898 // CHECK-LABEL: @test_vst4_f16(
12899 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
12900 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
12901 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
12902 // CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
12903 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
12904 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
12905 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12906 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12907 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12908 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
12909 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12910 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12911 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12912 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
12913 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12914 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12915 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12916 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
12917 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
12918 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12919 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12920 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
12921 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
12922 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
12923 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12924 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12925 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12926 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
12927 // CHECK:   call void @llvm.aarch64.neon.st4.v4f16.p0i8(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i8* [[TMP2]])
12928 // CHECK:   ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)12929 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
12930   vst4_f16(a, b);
12931 }
12932 
12933 // CHECK-LABEL: @test_vst4_f32(
12934 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
12935 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
12936 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
12937 // CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
12938 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
12939 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
12940 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12941 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12942 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12943 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
12944 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12945 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12946 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12947 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
12948 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12949 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12950 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12951 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
12952 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
12953 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12954 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12955 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
12956 // CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
12957 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
12958 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12959 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12960 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12961 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
12962 // CHECK:   call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]])
12963 // CHECK:   ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)12964 void test_vst4_f32(float32_t *a, float32x2x4_t b) {
12965   vst4_f32(a, b);
12966 }
12967 
12968 // CHECK-LABEL: @test_vst4_f64(
12969 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
12970 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
12971 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
12972 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
12973 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
12974 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
12975 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12976 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12977 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12978 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
12979 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12980 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12981 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12982 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
12983 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12984 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12985 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12986 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
12987 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
12988 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12989 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12990 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
12991 // CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
12992 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
12993 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12994 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12995 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12996 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
12997 // CHECK:   call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]])
12998 // CHECK:   ret void
test_vst4_f64(float64_t * a,float64x1x4_t b)12999 void test_vst4_f64(float64_t *a, float64x1x4_t b) {
13000   vst4_f64(a, b);
13001 }
13002 
13003 // CHECK-LABEL: @test_vst4_p8(
13004 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
13005 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
13006 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
13007 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13008 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
13009 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
13010 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13011 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13012 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13013 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13014 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13015 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13016 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13017 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13018 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13019 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13020 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13021 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13022 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13023 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13024 // CHECK:   ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)13025 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
13026   vst4_p8(a, b);
13027 }
13028 
13029 // CHECK-LABEL: @test_vst4_p16(
13030 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
13031 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
13032 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
13033 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13034 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
13035 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
13036 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13037 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13038 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13039 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13040 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13041 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13042 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13043 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13044 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13045 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13046 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13047 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13048 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13049 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13050 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13051 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13052 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13053 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13054 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13055 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13056 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13057 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13058 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13059 // CHECK:   ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)13060 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
13061   vst4_p16(a, b);
13062 }
13063 
13064 // CHECK-LABEL: @test_vld1q_f64_x2(
13065 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
13066 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
13067 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13068 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13069 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13070 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]])
13071 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
13072 // CHECK:   store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
13073 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
13074 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13075 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13076 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
13077 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
test_vld1q_f64_x2(float64_t const * a)13078 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
13079   return vld1q_f64_x2(a);
13080 }
13081 
13082 // CHECK-LABEL: @test_vld1q_p64_x2(
13083 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
13084 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
13085 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13086 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13087 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13088 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
13089 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
13090 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
13091 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
13092 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13093 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13094 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
13095 // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
test_vld1q_p64_x2(poly64_t const * a)13096 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
13097   return vld1q_p64_x2(a);
13098 }
13099 
13100 // CHECK-LABEL: @test_vld1_f64_x2(
13101 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
13102 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
13103 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13104 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13105 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13106 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]])
13107 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
13108 // CHECK:   store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
13109 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
13110 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13111 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13112 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
13113 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
test_vld1_f64_x2(float64_t const * a)13114 float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
13115   return vld1_f64_x2(a);
13116 }
13117 
13118 // CHECK-LABEL: @test_vld1_p64_x2(
13119 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
13120 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
13121 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13122 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13123 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13124 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
13125 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
13126 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
13127 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
13128 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13129 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13130 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
13131 // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
test_vld1_p64_x2(poly64_t const * a)13132 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
13133   return vld1_p64_x2(a);
13134 }
13135 
13136 // CHECK-LABEL: @test_vld1q_f64_x3(
13137 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
13138 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
13139 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13140 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13141 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13142 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]])
13143 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
13144 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13145 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
13146 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13147 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13148 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
13149 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
test_vld1q_f64_x3(float64_t const * a)13150 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
13151   return vld1q_f64_x3(a);
13152 }
13153 
13154 // CHECK-LABEL: @test_vld1q_p64_x3(
13155 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
13156 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
13157 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13158 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13159 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13160 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
13161 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
13162 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13163 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
13164 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13165 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13166 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
13167 // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
test_vld1q_p64_x3(poly64_t const * a)13168 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
13169   return vld1q_p64_x3(a);
13170 }
13171 
13172 // CHECK-LABEL: @test_vld1_f64_x3(
13173 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
13174 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
13175 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13176 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13177 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13178 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]])
13179 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
13180 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13181 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
13182 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13183 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13184 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
13185 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
test_vld1_f64_x3(float64_t const * a)13186 float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
13187   return vld1_f64_x3(a);
13188 }
13189 
13190 // CHECK-LABEL: @test_vld1_p64_x3(
13191 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
13192 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
13193 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13194 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13195 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13196 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
13197 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
13198 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13199 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
13200 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13201 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13202 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
13203 // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
test_vld1_p64_x3(poly64_t const * a)13204 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
13205   return vld1_p64_x3(a);
13206 }
13207 
13208 // CHECK-LABEL: @test_vld1q_f64_x4(
13209 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
13210 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
13211 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13212 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13213 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13214 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]])
13215 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
13216 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13217 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
13218 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13219 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13220 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
13221 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
test_vld1q_f64_x4(float64_t const * a)13222 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
13223   return vld1q_f64_x4(a);
13224 }
13225 
13226 // CHECK-LABEL: @test_vld1q_p64_x4(
13227 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
13228 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
13229 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13230 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13231 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13232 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
13233 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
13234 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13235 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
13236 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13237 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13238 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
13239 // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
test_vld1q_p64_x4(poly64_t const * a)13240 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
13241   return vld1q_p64_x4(a);
13242 }
13243 
13244 // CHECK-LABEL: @test_vld1_f64_x4(
13245 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
13246 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
13247 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13248 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13249 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13250 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]])
13251 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
13252 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13253 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
13254 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13255 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13256 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
13257 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
test_vld1_f64_x4(float64_t const * a)13258 float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
13259   return vld1_f64_x4(a);
13260 }
13261 
13262 // CHECK-LABEL: @test_vld1_p64_x4(
13263 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
13264 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
13265 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13266 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13267 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13268 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
13269 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
13270 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13271 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
13272 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13273 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13274 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
13275 // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
test_vld1_p64_x4(poly64_t const * a)13276 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
13277   return vld1_p64_x4(a);
13278 }
13279 
13280 // CHECK-LABEL: @test_vst1q_f64_x2(
13281 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
13282 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
13283 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
13284 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
13285 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
13286 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
13287 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13288 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13289 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13290 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
13291 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13292 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13293 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13294 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
13295 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13296 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13297 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13298 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13299 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13300 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]])
13301 // CHECK:   ret void
test_vst1q_f64_x2(float64_t * a,float64x2x2_t b)13302 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
13303   vst1q_f64_x2(a, b);
13304 }
13305 
13306 // CHECK-LABEL: @test_vst1q_p64_x2(
13307 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
13308 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
13309 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
13310 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
13311 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
13312 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
13313 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13314 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13315 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13316 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
13317 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13318 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13319 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13320 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13321 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13322 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13323 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13324 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13325 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13326 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
13327 // CHECK:   ret void
test_vst1q_p64_x2(poly64_t * a,poly64x2x2_t b)13328 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
13329   vst1q_p64_x2(a, b);
13330 }
13331 
13332 // CHECK-LABEL: @test_vst1_f64_x2(
13333 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
13334 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
13335 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
13336 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
13337 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
13338 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
13339 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13340 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13341 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13342 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
13343 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13344 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13345 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13346 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
13347 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13348 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13349 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13350 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13351 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13352 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]])
13353 // CHECK:   ret void
test_vst1_f64_x2(float64_t * a,float64x1x2_t b)13354 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
13355   vst1_f64_x2(a, b);
13356 }
13357 
13358 // CHECK-LABEL: @test_vst1_p64_x2(
13359 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
13360 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
13361 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
13362 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
13363 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
13364 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
13365 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13366 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13367 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13368 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
13369 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13370 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13371 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13372 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13373 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13374 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13375 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13376 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13377 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13378 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
13379 // CHECK:   ret void
test_vst1_p64_x2(poly64_t * a,poly64x1x2_t b)13380 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
13381   vst1_p64_x2(a, b);
13382 }
13383 
13384 // CHECK-LABEL: @test_vst1q_f64_x3(
13385 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
13386 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
13387 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
13388 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
13389 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
13390 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
13391 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13392 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13393 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13394 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
13395 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13396 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13397 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13398 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
13399 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13400 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13401 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13402 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
13403 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13404 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13405 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13406 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13407 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13408 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13409 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]])
13410 // CHECK:   ret void
test_vst1q_f64_x3(float64_t * a,float64x2x3_t b)13411 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
13412   vst1q_f64_x3(a, b);
13413 }
13414 
13415 // CHECK-LABEL: @test_vst1q_p64_x3(
13416 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
13417 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
13418 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
13419 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
13420 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
13421 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
13422 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13423 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13424 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13425 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
13426 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13427 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13428 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13429 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13430 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13431 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13432 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13433 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13434 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13435 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13436 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13437 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13438 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13439 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13440 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
13441 // CHECK:   ret void
test_vst1q_p64_x3(poly64_t * a,poly64x2x3_t b)13442 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
13443   vst1q_p64_x3(a, b);
13444 }
13445 
13446 // CHECK-LABEL: @test_vst1_f64_x3(
13447 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
13448 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
13449 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
13450 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
13451 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
13452 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
13453 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13454 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13455 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13456 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
13457 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13458 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13459 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13460 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
13461 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13462 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13463 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13464 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
13465 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13466 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13467 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13468 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13469 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13470 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13471 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]])
13472 // CHECK:   ret void
test_vst1_f64_x3(float64_t * a,float64x1x3_t b)13473 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
13474   vst1_f64_x3(a, b);
13475 }
13476 
13477 // CHECK-LABEL: @test_vst1_p64_x3(
13478 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
13479 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
13480 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
13481 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
13482 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
13483 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
13484 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13485 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13486 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13487 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
13488 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13489 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13490 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13491 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13492 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13493 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13494 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13495 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13496 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13497 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13498 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13499 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13500 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13501 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13502 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
13503 // CHECK:   ret void
test_vst1_p64_x3(poly64_t * a,poly64x1x3_t b)13504 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
13505   vst1_p64_x3(a, b);
13506 }
13507 
13508 // CHECK-LABEL: @test_vst1q_f64_x4(
13509 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
13510 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
13511 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
13512 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
13513 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
13514 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
13515 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13516 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13517 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13518 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
13519 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13520 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13521 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13522 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
13523 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13524 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13525 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13526 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
13527 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13528 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13529 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13530 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
13531 // CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
13532 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
13533 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13534 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13535 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13536 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
13537 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13538 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]])
13539 // CHECK:   ret void
test_vst1q_f64_x4(float64_t * a,float64x2x4_t b)13540 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
13541   vst1q_f64_x4(a, b);
13542 }
13543 
13544 // CHECK-LABEL: @test_vst1q_p64_x4(
13545 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
13546 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
13547 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
13548 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13549 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
13550 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
13551 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13552 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13553 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13554 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13555 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13556 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13557 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13558 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13559 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13560 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13561 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13562 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13563 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13564 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13565 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13566 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13567 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13568 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13569 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13570 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13571 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13572 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13573 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13574 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
13575 // CHECK:   ret void
test_vst1q_p64_x4(poly64_t * a,poly64x2x4_t b)13576 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
13577   vst1q_p64_x4(a, b);
13578 }
13579 
13580 // CHECK-LABEL: @test_vst1_f64_x4(
13581 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
13582 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
13583 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
13584 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
13585 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
13586 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
13587 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13588 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13589 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13590 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
13591 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13592 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13593 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13594 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
13595 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13596 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13597 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13598 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
13599 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13600 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13601 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13602 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
13603 // CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
13604 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
13605 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13606 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13607 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13608 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
13609 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13610 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]])
13611 // CHECK:   ret void
test_vst1_f64_x4(float64_t * a,float64x1x4_t b)13612 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
13613   vst1_f64_x4(a, b);
13614 }
13615 
13616 // CHECK-LABEL: @test_vst1_p64_x4(
13617 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
13618 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
13619 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
13620 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13621 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
13622 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
13623 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13624 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13625 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13626 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13627 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13628 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13629 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13630 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13631 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13632 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13633 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13634 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13635 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13636 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13637 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13638 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13639 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13640 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13641 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13642 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13643 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13644 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13645 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13646 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
13647 // CHECK:   ret void
test_vst1_p64_x4(poly64_t * a,poly64x1x4_t b)13648 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
13649   vst1_p64_x4(a, b);
13650 }
13651 
13652 // CHECK-LABEL: @test_vceqd_s64(
13653 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
13654 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13655 // CHECK:   ret i64 [[VCEQD_I]]
test_vceqd_s64(int64_t a,int64_t b)13656 int64_t test_vceqd_s64(int64_t a, int64_t b) {
13657   return (int64_t)vceqd_s64(a, b);
13658 }
13659 
13660 // CHECK-LABEL: @test_vceqd_u64(
13661 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
13662 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13663 // CHECK:   ret i64 [[VCEQD_I]]
test_vceqd_u64(uint64_t a,uint64_t b)13664 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
13665   return (int64_t)vceqd_u64(a, b);
13666 }
13667 
13668 // CHECK-LABEL: @test_vceqzd_s64(
13669 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
13670 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
13671 // CHECK:   ret i64 [[VCEQZ_I]]
test_vceqzd_s64(int64_t a)13672 int64_t test_vceqzd_s64(int64_t a) {
13673   return (int64_t)vceqzd_s64(a);
13674 }
13675 
13676 // CHECK-LABEL: @test_vceqzd_u64(
13677 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
13678 // CHECK:   [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
13679 // CHECK:   ret i64 [[VCEQZD_I]]
test_vceqzd_u64(int64_t a)13680 int64_t test_vceqzd_u64(int64_t a) {
13681   return (int64_t)vceqzd_u64(a);
13682 }
13683 
13684 // CHECK-LABEL: @test_vcged_s64(
13685 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, %b
13686 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13687 // CHECK:   ret i64 [[VCEQD_I]]
test_vcged_s64(int64_t a,int64_t b)13688 int64_t test_vcged_s64(int64_t a, int64_t b) {
13689   return (int64_t)vcged_s64(a, b);
13690 }
13691 
13692 // CHECK-LABEL: @test_vcged_u64(
13693 // CHECK:   [[TMP0:%.*]] = icmp uge i64 %a, %b
13694 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13695 // CHECK:   ret i64 [[VCEQD_I]]
test_vcged_u64(uint64_t a,uint64_t b)13696 uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
13697   return (uint64_t)vcged_u64(a, b);
13698 }
13699 
13700 // CHECK-LABEL: @test_vcgezd_s64(
13701 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, 0
13702 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13703 // CHECK:   ret i64 [[VCGEZ_I]]
test_vcgezd_s64(int64_t a)13704 int64_t test_vcgezd_s64(int64_t a) {
13705   return (int64_t)vcgezd_s64(a);
13706 }
13707 
13708 // CHECK-LABEL: @test_vcgtd_s64(
13709 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, %b
13710 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13711 // CHECK:   ret i64 [[VCEQD_I]]
test_vcgtd_s64(int64_t a,int64_t b)13712 int64_t test_vcgtd_s64(int64_t a, int64_t b) {
13713   return (int64_t)vcgtd_s64(a, b);
13714 }
13715 
13716 // CHECK-LABEL: @test_vcgtd_u64(
13717 // CHECK:   [[TMP0:%.*]] = icmp ugt i64 %a, %b
13718 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13719 // CHECK:   ret i64 [[VCEQD_I]]
test_vcgtd_u64(uint64_t a,uint64_t b)13720 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
13721   return (uint64_t)vcgtd_u64(a, b);
13722 }
13723 
13724 // CHECK-LABEL: @test_vcgtzd_s64(
13725 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, 0
13726 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13727 // CHECK:   ret i64 [[VCGTZ_I]]
test_vcgtzd_s64(int64_t a)13728 int64_t test_vcgtzd_s64(int64_t a) {
13729   return (int64_t)vcgtzd_s64(a);
13730 }
13731 
13732 // CHECK-LABEL: @test_vcled_s64(
13733 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, %b
13734 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13735 // CHECK:   ret i64 [[VCEQD_I]]
test_vcled_s64(int64_t a,int64_t b)13736 int64_t test_vcled_s64(int64_t a, int64_t b) {
13737   return (int64_t)vcled_s64(a, b);
13738 }
13739 
13740 // CHECK-LABEL: @test_vcled_u64(
13741 // CHECK:   [[TMP0:%.*]] = icmp ule i64 %a, %b
13742 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13743 // CHECK:   ret i64 [[VCEQD_I]]
test_vcled_u64(uint64_t a,uint64_t b)13744 uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
13745   return (uint64_t)vcled_u64(a, b);
13746 }
13747 
13748 // CHECK-LABEL: @test_vclezd_s64(
13749 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, 0
13750 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13751 // CHECK:   ret i64 [[VCLEZ_I]]
test_vclezd_s64(int64_t a)13752 int64_t test_vclezd_s64(int64_t a) {
13753   return (int64_t)vclezd_s64(a);
13754 }
13755 
13756 // CHECK-LABEL: @test_vcltd_s64(
13757 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, %b
13758 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13759 // CHECK:   ret i64 [[VCEQD_I]]
test_vcltd_s64(int64_t a,int64_t b)13760 int64_t test_vcltd_s64(int64_t a, int64_t b) {
13761   return (int64_t)vcltd_s64(a, b);
13762 }
13763 
13764 // CHECK-LABEL: @test_vcltd_u64(
13765 // CHECK:   [[TMP0:%.*]] = icmp ult i64 %a, %b
13766 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13767 // CHECK:   ret i64 [[VCEQD_I]]
test_vcltd_u64(uint64_t a,uint64_t b)13768 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
13769   return (uint64_t)vcltd_u64(a, b);
13770 }
13771 
13772 // CHECK-LABEL: @test_vcltzd_s64(
13773 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, 0
13774 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13775 // CHECK:   ret i64 [[VCLTZ_I]]
test_vcltzd_s64(int64_t a)13776 int64_t test_vcltzd_s64(int64_t a) {
13777   return (int64_t)vcltzd_s64(a);
13778 }
13779 
13780 // CHECK-LABEL: @test_vtstd_s64(
13781 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
13782 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13783 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13784 // CHECK:   ret i64 [[VTSTD_I]]
test_vtstd_s64(int64_t a,int64_t b)13785 int64_t test_vtstd_s64(int64_t a, int64_t b) {
13786   return (int64_t)vtstd_s64(a, b);
13787 }
13788 
13789 // CHECK-LABEL: @test_vtstd_u64(
13790 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
13791 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13792 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13793 // CHECK:   ret i64 [[VTSTD_I]]
test_vtstd_u64(uint64_t a,uint64_t b)13794 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
13795   return (uint64_t)vtstd_u64(a, b);
13796 }
13797 
13798 // CHECK-LABEL: @test_vabsd_s64(
13799 // CHECK:   [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a)
13800 // CHECK:   ret i64 [[VABSD_S64_I]]
test_vabsd_s64(int64_t a)13801 int64_t test_vabsd_s64(int64_t a) {
13802   return (int64_t)vabsd_s64(a);
13803 }
13804 
13805 // CHECK-LABEL: @test_vqabsb_s8(
13806 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13807 // CHECK:   [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
13808 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
13809 // CHECK:   ret i8 [[TMP1]]
test_vqabsb_s8(int8_t a)13810 int8_t test_vqabsb_s8(int8_t a) {
13811   return (int8_t)vqabsb_s8(a);
13812 }
13813 
13814 // CHECK-LABEL: @test_vqabsh_s16(
13815 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13816 // CHECK:   [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
13817 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
13818 // CHECK:   ret i16 [[TMP1]]
test_vqabsh_s16(int16_t a)13819 int16_t test_vqabsh_s16(int16_t a) {
13820   return (int16_t)vqabsh_s16(a);
13821 }
13822 
13823 // CHECK-LABEL: @test_vqabss_s32(
13824 // CHECK:   [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
13825 // CHECK:   ret i32 [[VQABSS_S32_I]]
test_vqabss_s32(int32_t a)13826 int32_t test_vqabss_s32(int32_t a) {
13827   return (int32_t)vqabss_s32(a);
13828 }
13829 
13830 // CHECK-LABEL: @test_vqabsd_s64(
13831 // CHECK:   [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
13832 // CHECK:   ret i64 [[VQABSD_S64_I]]
test_vqabsd_s64(int64_t a)13833 int64_t test_vqabsd_s64(int64_t a) {
13834   return (int64_t)vqabsd_s64(a);
13835 }
13836 
13837 // CHECK-LABEL: @test_vnegd_s64(
13838 // CHECK:   [[VNEGD_I:%.*]] = sub i64 0, %a
13839 // CHECK:   ret i64 [[VNEGD_I]]
test_vnegd_s64(int64_t a)13840 int64_t test_vnegd_s64(int64_t a) {
13841   return (int64_t)vnegd_s64(a);
13842 }
13843 
13844 // CHECK-LABEL: @test_vqnegb_s8(
13845 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13846 // CHECK:   [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
13847 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
13848 // CHECK:   ret i8 [[TMP1]]
test_vqnegb_s8(int8_t a)13849 int8_t test_vqnegb_s8(int8_t a) {
13850   return (int8_t)vqnegb_s8(a);
13851 }
13852 
13853 // CHECK-LABEL: @test_vqnegh_s16(
13854 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13855 // CHECK:   [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
13856 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
13857 // CHECK:   ret i16 [[TMP1]]
test_vqnegh_s16(int16_t a)13858 int16_t test_vqnegh_s16(int16_t a) {
13859   return (int16_t)vqnegh_s16(a);
13860 }
13861 
13862 // CHECK-LABEL: @test_vqnegs_s32(
13863 // CHECK:   [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
13864 // CHECK:   ret i32 [[VQNEGS_S32_I]]
test_vqnegs_s32(int32_t a)13865 int32_t test_vqnegs_s32(int32_t a) {
13866   return (int32_t)vqnegs_s32(a);
13867 }
13868 
13869 // CHECK-LABEL: @test_vqnegd_s64(
13870 // CHECK:   [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
13871 // CHECK:   ret i64 [[VQNEGD_S64_I]]
test_vqnegd_s64(int64_t a)13872 int64_t test_vqnegd_s64(int64_t a) {
13873   return (int64_t)vqnegd_s64(a);
13874 }
13875 
13876 // CHECK-LABEL: @test_vuqaddb_s8(
13877 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13878 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
13879 // CHECK:   [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13880 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
13881 // CHECK:   ret i8 [[TMP2]]
test_vuqaddb_s8(int8_t a,uint8_t b)13882 int8_t test_vuqaddb_s8(int8_t a, uint8_t b) {
13883   return (int8_t)vuqaddb_s8(a, b);
13884 }
13885 
13886 // CHECK-LABEL: @test_vuqaddh_s16(
13887 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13888 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13889 // CHECK:   [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13890 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
13891 // CHECK:   ret i16 [[TMP2]]
test_vuqaddh_s16(int16_t a,uint16_t b)13892 int16_t test_vuqaddh_s16(int16_t a, uint16_t b) {
13893   return (int16_t)vuqaddh_s16(a, b);
13894 }
13895 
13896 // CHECK-LABEL: @test_vuqadds_s32(
13897 // CHECK:   [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
13898 // CHECK:   ret i32 [[VUQADDS_S32_I]]
test_vuqadds_s32(int32_t a,uint32_t b)13899 int32_t test_vuqadds_s32(int32_t a, uint32_t b) {
13900   return (int32_t)vuqadds_s32(a, b);
13901 }
13902 
13903 // CHECK-LABEL: @test_vuqaddd_s64(
13904 // CHECK:   [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
13905 // CHECK:   ret i64 [[VUQADDD_S64_I]]
test_vuqaddd_s64(int64_t a,uint64_t b)13906 int64_t test_vuqaddd_s64(int64_t a, uint64_t b) {
13907   return (int64_t)vuqaddd_s64(a, b);
13908 }
13909 
13910 // CHECK-LABEL: @test_vsqaddb_u8(
13911 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13912 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
13913 // CHECK:   [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13914 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
13915 // CHECK:   ret i8 [[TMP2]]
test_vsqaddb_u8(uint8_t a,int8_t b)13916 uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
13917   return (uint8_t)vsqaddb_u8(a, b);
13918 }
13919 
13920 // CHECK-LABEL: @test_vsqaddh_u16(
13921 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13922 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13923 // CHECK:   [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13924 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
13925 // CHECK:   ret i16 [[TMP2]]
test_vsqaddh_u16(uint16_t a,int16_t b)13926 uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
13927   return (uint16_t)vsqaddh_u16(a, b);
13928 }
13929 
13930 // CHECK-LABEL: @test_vsqadds_u32(
13931 // CHECK:   [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
13932 // CHECK:   ret i32 [[VSQADDS_U32_I]]
test_vsqadds_u32(uint32_t a,int32_t b)13933 uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
13934   return (uint32_t)vsqadds_u32(a, b);
13935 }
13936 
13937 // CHECK-LABEL: @test_vsqaddd_u64(
13938 // CHECK:   [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
13939 // CHECK:   ret i64 [[VSQADDD_U64_I]]
test_vsqaddd_u64(uint64_t a,int64_t b)13940 uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
13941   return (uint64_t)vsqaddd_u64(a, b);
13942 }
13943 
13944 // CHECK-LABEL: @test_vqdmlalh_s16(
13945 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13946 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
13947 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13948 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13949 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
13950 // CHECK:   ret i32 [[VQDMLXL1_I]]
test_vqdmlalh_s16(int32_t a,int16_t b,int16_t c)13951 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
13952   return (int32_t)vqdmlalh_s16(a, b, c);
13953 }
13954 
13955 // CHECK-LABEL: @test_vqdmlals_s32(
13956 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13957 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
13958 // CHECK:   ret i64 [[VQDMLXL1_I]]
test_vqdmlals_s32(int64_t a,int32_t b,int32_t c)13959 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
13960   return (int64_t)vqdmlals_s32(a, b, c);
13961 }
13962 
13963 // CHECK-LABEL: @test_vqdmlslh_s16(
13964 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13965 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
13966 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13967 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13968 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
13969 // CHECK:   ret i32 [[VQDMLXL1_I]]
test_vqdmlslh_s16(int32_t a,int16_t b,int16_t c)13970 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
13971   return (int32_t)vqdmlslh_s16(a, b, c);
13972 }
13973 
13974 // CHECK-LABEL: @test_vqdmlsls_s32(
13975 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13976 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
13977 // CHECK:   ret i64 [[VQDMLXL1_I]]
test_vqdmlsls_s32(int64_t a,int32_t b,int32_t c)13978 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
13979   return (int64_t)vqdmlsls_s32(a, b, c);
13980 }
13981 
13982 // CHECK-LABEL: @test_vqdmullh_s16(
13983 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13984 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13985 // CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13986 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
13987 // CHECK:   ret i32 [[TMP2]]
test_vqdmullh_s16(int16_t a,int16_t b)13988 int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
13989   return (int32_t)vqdmullh_s16(a, b);
13990 }
13991 
13992 // CHECK-LABEL: @test_vqdmulls_s32(
13993 // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
13994 // CHECK:   ret i64 [[VQDMULLS_S32_I]]
test_vqdmulls_s32(int32_t a,int32_t b)13995 int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
13996   return (int64_t)vqdmulls_s32(a, b);
13997 }
13998 
13999 // CHECK-LABEL: @test_vqmovunh_s16(
14000 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14001 // CHECK:   [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
14002 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
14003 // CHECK:   ret i8 [[TMP1]]
test_vqmovunh_s16(int16_t a)14004 int8_t test_vqmovunh_s16(int16_t a) {
14005   return (int8_t)vqmovunh_s16(a);
14006 }
14007 
14008 // CHECK-LABEL: @test_vqmovuns_s32(
14009 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14010 // CHECK:   [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
14011 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
14012 // CHECK:   ret i16 [[TMP1]]
test_vqmovuns_s32(int32_t a)14013 int16_t test_vqmovuns_s32(int32_t a) {
14014   return (int16_t)vqmovuns_s32(a);
14015 }
14016 
14017 // CHECK-LABEL: @test_vqmovund_s64(
14018 // CHECK:   [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
14019 // CHECK:   ret i32 [[VQMOVUND_S64_I]]
test_vqmovund_s64(int64_t a)14020 int32_t test_vqmovund_s64(int64_t a) {
14021   return (int32_t)vqmovund_s64(a);
14022 }
14023 
14024 // CHECK-LABEL: @test_vqmovnh_s16(
14025 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14026 // CHECK:   [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
14027 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
14028 // CHECK:   ret i8 [[TMP1]]
test_vqmovnh_s16(int16_t a)14029 int8_t test_vqmovnh_s16(int16_t a) {
14030   return (int8_t)vqmovnh_s16(a);
14031 }
14032 
14033 // CHECK-LABEL: @test_vqmovns_s32(
14034 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14035 // CHECK:   [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
14036 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
14037 // CHECK:   ret i16 [[TMP1]]
test_vqmovns_s32(int32_t a)14038 int16_t test_vqmovns_s32(int32_t a) {
14039   return (int16_t)vqmovns_s32(a);
14040 }
14041 
14042 // CHECK-LABEL: @test_vqmovnd_s64(
14043 // CHECK:   [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
14044 // CHECK:   ret i32 [[VQMOVND_S64_I]]
test_vqmovnd_s64(int64_t a)14045 int32_t test_vqmovnd_s64(int64_t a) {
14046   return (int32_t)vqmovnd_s64(a);
14047 }
14048 
14049 // CHECK-LABEL: @test_vqmovnh_u16(
14050 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14051 // CHECK:   [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
14052 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
14053 // CHECK:   ret i8 [[TMP1]]
test_vqmovnh_u16(int16_t a)14054 int8_t test_vqmovnh_u16(int16_t a) {
14055   return (int8_t)vqmovnh_u16(a);
14056 }
14057 
14058 // CHECK-LABEL: @test_vqmovns_u32(
14059 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14060 // CHECK:   [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
14061 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
14062 // CHECK:   ret i16 [[TMP1]]
test_vqmovns_u32(int32_t a)14063 int16_t test_vqmovns_u32(int32_t a) {
14064   return (int16_t)vqmovns_u32(a);
14065 }
14066 
14067 // CHECK-LABEL: @test_vqmovnd_u64(
14068 // CHECK:   [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
14069 // CHECK:   ret i32 [[VQMOVND_U64_I]]
test_vqmovnd_u64(int64_t a)14070 int32_t test_vqmovnd_u64(int64_t a) {
14071   return (int32_t)vqmovnd_u64(a);
14072 }
14073 
14074 // CHECK-LABEL: @test_vceqs_f32(
14075 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, %b
14076 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14077 // CHECK:   ret i32 [[VCMPD_I]]
test_vceqs_f32(float32_t a,float32_t b)14078 uint32_t test_vceqs_f32(float32_t a, float32_t b) {
14079   return (uint32_t)vceqs_f32(a, b);
14080 }
14081 
14082 // CHECK-LABEL: @test_vceqd_f64(
14083 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, %b
14084 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14085 // CHECK:   ret i64 [[VCMPD_I]]
test_vceqd_f64(float64_t a,float64_t b)14086 uint64_t test_vceqd_f64(float64_t a, float64_t b) {
14087   return (uint64_t)vceqd_f64(a, b);
14088 }
14089 
14090 // CHECK-LABEL: @test_vceqzs_f32(
14091 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
14092 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
14093 // CHECK:   ret i32 [[VCEQZ_I]]
test_vceqzs_f32(float32_t a)14094 uint32_t test_vceqzs_f32(float32_t a) {
14095   return (uint32_t)vceqzs_f32(a);
14096 }
14097 
14098 // CHECK-LABEL: @test_vceqzd_f64(
14099 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
14100 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
14101 // CHECK:   ret i64 [[VCEQZ_I]]
test_vceqzd_f64(float64_t a)14102 uint64_t test_vceqzd_f64(float64_t a) {
14103   return (uint64_t)vceqzd_f64(a);
14104 }
14105 
14106 // CHECK-LABEL: @test_vcges_f32(
14107 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, %b
14108 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14109 // CHECK:   ret i32 [[VCMPD_I]]
test_vcges_f32(float32_t a,float32_t b)14110 uint32_t test_vcges_f32(float32_t a, float32_t b) {
14111   return (uint32_t)vcges_f32(a, b);
14112 }
14113 
14114 // CHECK-LABEL: @test_vcged_f64(
14115 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, %b
14116 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14117 // CHECK:   ret i64 [[VCMPD_I]]
test_vcged_f64(float64_t a,float64_t b)14118 uint64_t test_vcged_f64(float64_t a, float64_t b) {
14119   return (uint64_t)vcged_f64(a, b);
14120 }
14121 
14122 // CHECK-LABEL: @test_vcgezs_f32(
14123 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
14124 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14125 // CHECK:   ret i32 [[VCGEZ_I]]
test_vcgezs_f32(float32_t a)14126 uint32_t test_vcgezs_f32(float32_t a) {
14127   return (uint32_t)vcgezs_f32(a);
14128 }
14129 
14130 // CHECK-LABEL: @test_vcgezd_f64(
14131 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
14132 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14133 // CHECK:   ret i64 [[VCGEZ_I]]
test_vcgezd_f64(float64_t a)14134 uint64_t test_vcgezd_f64(float64_t a) {
14135   return (uint64_t)vcgezd_f64(a);
14136 }
14137 
14138 // CHECK-LABEL: @test_vcgts_f32(
14139 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, %b
14140 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14141 // CHECK:   ret i32 [[VCMPD_I]]
test_vcgts_f32(float32_t a,float32_t b)14142 uint32_t test_vcgts_f32(float32_t a, float32_t b) {
14143   return (uint32_t)vcgts_f32(a, b);
14144 }
14145 
14146 // CHECK-LABEL: @test_vcgtd_f64(
14147 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, %b
14148 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14149 // CHECK:   ret i64 [[VCMPD_I]]
test_vcgtd_f64(float64_t a,float64_t b)14150 uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
14151   return (uint64_t)vcgtd_f64(a, b);
14152 }
14153 
14154 // CHECK-LABEL: @test_vcgtzs_f32(
14155 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
14156 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14157 // CHECK:   ret i32 [[VCGTZ_I]]
test_vcgtzs_f32(float32_t a)14158 uint32_t test_vcgtzs_f32(float32_t a) {
14159   return (uint32_t)vcgtzs_f32(a);
14160 }
14161 
14162 // CHECK-LABEL: @test_vcgtzd_f64(
14163 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
14164 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14165 // CHECK:   ret i64 [[VCGTZ_I]]
test_vcgtzd_f64(float64_t a)14166 uint64_t test_vcgtzd_f64(float64_t a) {
14167   return (uint64_t)vcgtzd_f64(a);
14168 }
14169 
14170 // CHECK-LABEL: @test_vcles_f32(
14171 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, %b
14172 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14173 // CHECK:   ret i32 [[VCMPD_I]]
test_vcles_f32(float32_t a,float32_t b)14174 uint32_t test_vcles_f32(float32_t a, float32_t b) {
14175   return (uint32_t)vcles_f32(a, b);
14176 }
14177 
14178 // CHECK-LABEL: @test_vcled_f64(
14179 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, %b
14180 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14181 // CHECK:   ret i64 [[VCMPD_I]]
test_vcled_f64(float64_t a,float64_t b)14182 uint64_t test_vcled_f64(float64_t a, float64_t b) {
14183   return (uint64_t)vcled_f64(a, b);
14184 }
14185 
14186 // CHECK-LABEL: @test_vclezs_f32(
14187 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
14188 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14189 // CHECK:   ret i32 [[VCLEZ_I]]
test_vclezs_f32(float32_t a)14190 uint32_t test_vclezs_f32(float32_t a) {
14191   return (uint32_t)vclezs_f32(a);
14192 }
14193 
14194 // CHECK-LABEL: @test_vclezd_f64(
14195 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
14196 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14197 // CHECK:   ret i64 [[VCLEZ_I]]
test_vclezd_f64(float64_t a)14198 uint64_t test_vclezd_f64(float64_t a) {
14199   return (uint64_t)vclezd_f64(a);
14200 }
14201 
14202 // CHECK-LABEL: @test_vclts_f32(
14203 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, %b
14204 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14205 // CHECK:   ret i32 [[VCMPD_I]]
test_vclts_f32(float32_t a,float32_t b)14206 uint32_t test_vclts_f32(float32_t a, float32_t b) {
14207   return (uint32_t)vclts_f32(a, b);
14208 }
14209 
14210 // CHECK-LABEL: @test_vcltd_f64(
14211 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, %b
14212 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14213 // CHECK:   ret i64 [[VCMPD_I]]
test_vcltd_f64(float64_t a,float64_t b)14214 uint64_t test_vcltd_f64(float64_t a, float64_t b) {
14215   return (uint64_t)vcltd_f64(a, b);
14216 }
14217 
14218 // CHECK-LABEL: @test_vcltzs_f32(
14219 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
14220 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14221 // CHECK:   ret i32 [[VCLTZ_I]]
test_vcltzs_f32(float32_t a)14222 uint32_t test_vcltzs_f32(float32_t a) {
14223   return (uint32_t)vcltzs_f32(a);
14224 }
14225 
14226 // CHECK-LABEL: @test_vcltzd_f64(
14227 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
14228 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14229 // CHECK:   ret i64 [[VCLTZ_I]]
test_vcltzd_f64(float64_t a)14230 uint64_t test_vcltzd_f64(float64_t a) {
14231   return (uint64_t)vcltzd_f64(a);
14232 }
14233 
14234 // CHECK-LABEL: @test_vcages_f32(
14235 // CHECK:   [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b)
14236 // CHECK:   ret i32 [[VCAGES_F32_I]]
test_vcages_f32(float32_t a,float32_t b)14237 uint32_t test_vcages_f32(float32_t a, float32_t b) {
14238   return (uint32_t)vcages_f32(a, b);
14239 }
14240 
14241 // CHECK-LABEL: @test_vcaged_f64(
14242 // CHECK:   [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b)
14243 // CHECK:   ret i64 [[VCAGED_F64_I]]
test_vcaged_f64(float64_t a,float64_t b)14244 uint64_t test_vcaged_f64(float64_t a, float64_t b) {
14245   return (uint64_t)vcaged_f64(a, b);
14246 }
14247 
14248 // CHECK-LABEL: @test_vcagts_f32(
14249 // CHECK:   [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b)
14250 // CHECK:   ret i32 [[VCAGTS_F32_I]]
test_vcagts_f32(float32_t a,float32_t b)14251 uint32_t test_vcagts_f32(float32_t a, float32_t b) {
14252   return (uint32_t)vcagts_f32(a, b);
14253 }
14254 
14255 // CHECK-LABEL: @test_vcagtd_f64(
14256 // CHECK:   [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b)
14257 // CHECK:   ret i64 [[VCAGTD_F64_I]]
test_vcagtd_f64(float64_t a,float64_t b)14258 uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
14259   return (uint64_t)vcagtd_f64(a, b);
14260 }
14261 
14262 // CHECK-LABEL: @test_vcales_f32(
14263 // CHECK:   [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a)
14264 // CHECK:   ret i32 [[VCALES_F32_I]]
test_vcales_f32(float32_t a,float32_t b)14265 uint32_t test_vcales_f32(float32_t a, float32_t b) {
14266   return (uint32_t)vcales_f32(a, b);
14267 }
14268 
14269 // CHECK-LABEL: @test_vcaled_f64(
14270 // CHECK:   [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a)
14271 // CHECK:   ret i64 [[VCALED_F64_I]]
test_vcaled_f64(float64_t a,float64_t b)14272 uint64_t test_vcaled_f64(float64_t a, float64_t b) {
14273   return (uint64_t)vcaled_f64(a, b);
14274 }
14275 
14276 // CHECK-LABEL: @test_vcalts_f32(
14277 // CHECK:   [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
14278 // CHECK:   ret i32 [[VCALTS_F32_I]]
test_vcalts_f32(float32_t a,float32_t b)14279 uint32_t test_vcalts_f32(float32_t a, float32_t b) {
14280   return (uint32_t)vcalts_f32(a, b);
14281 }
14282 
14283 // CHECK-LABEL: @test_vcaltd_f64(
14284 // CHECK:   [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a)
14285 // CHECK:   ret i64 [[VCALTD_F64_I]]
test_vcaltd_f64(float64_t a,float64_t b)14286 uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
14287   return (uint64_t)vcaltd_f64(a, b);
14288 }
14289 
14290 // CHECK-LABEL: @test_vshrd_n_s64(
14291 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %a, 1
14292 // CHECK:   ret i64 [[SHRD_N]]
test_vshrd_n_s64(int64_t a)14293 int64_t test_vshrd_n_s64(int64_t a) {
14294   return (int64_t)vshrd_n_s64(a, 1);
14295 }
14296 
14297 // CHECK-LABEL: @test_vshr_n_s64(
14298 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14299 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14300 // CHECK:   [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14301 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)14302 int64x1_t test_vshr_n_s64(int64x1_t a) {
14303   return vshr_n_s64(a, 1);
14304 }
14305 
14306 // CHECK-LABEL: @test_vshrd_n_u64(
14307 // CHECK:   ret i64 0
test_vshrd_n_u64(uint64_t a)14308 uint64_t test_vshrd_n_u64(uint64_t a) {
14309   return (uint64_t)vshrd_n_u64(a, 64);
14310 }
14311 
14312 // CHECK-LABEL: @test_vshrd_n_u64_2(
14313 // CHECK:   ret i64 0
test_vshrd_n_u64_2()14314 uint64_t test_vshrd_n_u64_2() {
14315   uint64_t a = UINT64_C(0xf000000000000000);
14316   return vshrd_n_u64(a, 64);
14317 }
14318 
14319 // CHECK-LABEL: @test_vshr_n_u64(
14320 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14321 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14322 // CHECK:   [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14323 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)14324 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14325   return vshr_n_u64(a, 1);
14326 }
14327 
14328 // CHECK-LABEL: @test_vrshrd_n_s64(
14329 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
14330 // CHECK:   ret i64 [[VRSHR_N]]
test_vrshrd_n_s64(int64_t a)14331 int64_t test_vrshrd_n_s64(int64_t a) {
14332   return (int64_t)vrshrd_n_s64(a, 63);
14333 }
14334 
14335 // CHECK-LABEL: @test_vrshr_n_s64(
14336 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14337 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14338 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14339 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)14340 int64x1_t test_vrshr_n_s64(int64x1_t a) {
14341   return vrshr_n_s64(a, 1);
14342 }
14343 
14344 // CHECK-LABEL: @test_vrshrd_n_u64(
14345 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
14346 // CHECK:   ret i64 [[VRSHR_N]]
test_vrshrd_n_u64(uint64_t a)14347 uint64_t test_vrshrd_n_u64(uint64_t a) {
14348   return (uint64_t)vrshrd_n_u64(a, 63);
14349 }
14350 
14351 // CHECK-LABEL: @test_vrshr_n_u64(
14352 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14353 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14354 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14355 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)14356 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
14357   return vrshr_n_u64(a, 1);
14358 }
14359 
14360 // CHECK-LABEL: @test_vsrad_n_s64(
14361 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %b, 63
14362 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14363 // CHECK:   ret i64 [[TMP0]]
test_vsrad_n_s64(int64_t a,int64_t b)14364 int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
14365   return (int64_t)vsrad_n_s64(a, b, 63);
14366 }
14367 
14368 // CHECK-LABEL: @test_vsra_n_s64(
14369 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14370 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14371 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14372 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14373 // CHECK:   [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
14374 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14375 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)14376 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
14377   return vsra_n_s64(a, b, 1);
14378 }
14379 
14380 // CHECK-LABEL: @test_vsrad_n_u64(
14381 // CHECK:   [[SHRD_N:%.*]] = lshr i64 %b, 63
14382 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14383 // CHECK:   ret i64 [[TMP0]]
test_vsrad_n_u64(uint64_t a,uint64_t b)14384 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
14385   return (uint64_t)vsrad_n_u64(a, b, 63);
14386 }
14387 
14388 // CHECK-LABEL: @test_vsrad_n_u64_2(
14389 // CHECK:   ret i64 %a
test_vsrad_n_u64_2(uint64_t a,uint64_t b)14390 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
14391   return (uint64_t)vsrad_n_u64(a, b, 64);
14392 }
14393 
14394 // CHECK-LABEL: @test_vsra_n_u64(
14395 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14396 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14397 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14398 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14399 // CHECK:   [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
14400 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14401 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)14402 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
14403   return vsra_n_u64(a, b, 1);
14404 }
14405 
14406 // CHECK-LABEL: @test_vrsrad_n_s64(
14407 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
14408 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14409 // CHECK:   ret i64 [[TMP1]]
test_vrsrad_n_s64(int64_t a,int64_t b)14410 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
14411   return (int64_t)vrsrad_n_s64(a, b, 63);
14412 }
14413 
14414 // CHECK-LABEL: @test_vrsra_n_s64(
14415 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14416 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14417 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14418 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14419 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14420 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14421 // CHECK:   ret <1 x i64> [[TMP3]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)14422 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
14423   return vrsra_n_s64(a, b, 1);
14424 }
14425 
14426 // CHECK-LABEL: @test_vrsrad_n_u64(
14427 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
14428 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14429 // CHECK:   ret i64 [[TMP1]]
test_vrsrad_n_u64(uint64_t a,uint64_t b)14430 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
14431   return (uint64_t)vrsrad_n_u64(a, b, 63);
14432 }
14433 
14434 // CHECK-LABEL: @test_vrsra_n_u64(
14435 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14436 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14437 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14438 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14439 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14440 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14441 // CHECK:   ret <1 x i64> [[TMP3]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)14442 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
14443   return vrsra_n_u64(a, b, 1);
14444 }
14445 
14446 // CHECK-LABEL: @test_vshld_n_s64(
14447 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 1
14448 // CHECK:   ret i64 [[SHLD_N]]
test_vshld_n_s64(int64_t a)14449 int64_t test_vshld_n_s64(int64_t a) {
14450   return (int64_t)vshld_n_s64(a, 1);
14451 }
14452 
14453 // CHECK-LABEL: @test_vshl_n_s64(
14454 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14455 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14456 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14457 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)14458 int64x1_t test_vshl_n_s64(int64x1_t a) {
14459   return vshl_n_s64(a, 1);
14460 }
14461 
14462 // CHECK-LABEL: @test_vshld_n_u64(
14463 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 63
14464 // CHECK:   ret i64 [[SHLD_N]]
test_vshld_n_u64(uint64_t a)14465 uint64_t test_vshld_n_u64(uint64_t a) {
14466   return (uint64_t)vshld_n_u64(a, 63);
14467 }
14468 
14469 // CHECK-LABEL: @test_vshl_n_u64(
14470 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14471 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14472 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14473 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)14474 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14475   return vshl_n_u64(a, 1);
14476 }
14477 
14478 // CHECK-LABEL: @test_vqshlb_n_s8(
14479 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14480 // CHECK:   [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14481 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
14482 // CHECK:   ret i8 [[TMP1]]
test_vqshlb_n_s8(int8_t a)14483 int8_t test_vqshlb_n_s8(int8_t a) {
14484   return (int8_t)vqshlb_n_s8(a, 7);
14485 }
14486 
14487 // CHECK-LABEL: @test_vqshlh_n_s16(
14488 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14489 // CHECK:   [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14490 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
14491 // CHECK:   ret i16 [[TMP1]]
test_vqshlh_n_s16(int16_t a)14492 int16_t test_vqshlh_n_s16(int16_t a) {
14493   return (int16_t)vqshlh_n_s16(a, 15);
14494 }
14495 
14496 // CHECK-LABEL: @test_vqshls_n_s32(
14497 // CHECK:   [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
14498 // CHECK:   ret i32 [[VQSHLS_N_S32]]
test_vqshls_n_s32(int32_t a)14499 int32_t test_vqshls_n_s32(int32_t a) {
14500   return (int32_t)vqshls_n_s32(a, 31);
14501 }
14502 
14503 // CHECK-LABEL: @test_vqshld_n_s64(
14504 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
14505 // CHECK:   ret i64 [[VQSHL_N]]
test_vqshld_n_s64(int64_t a)14506 int64_t test_vqshld_n_s64(int64_t a) {
14507   return (int64_t)vqshld_n_s64(a, 63);
14508 }
14509 
14510 // CHECK-LABEL: @test_vqshl_n_s8(
14511 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14512 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)14513 int8x8_t test_vqshl_n_s8(int8x8_t a) {
14514   return vqshl_n_s8(a, 0);
14515 }
14516 
14517 // CHECK-LABEL: @test_vqshlq_n_s8(
14518 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14519 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)14520 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
14521   return vqshlq_n_s8(a, 0);
14522 }
14523 
14524 // CHECK-LABEL: @test_vqshl_n_s16(
14525 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14526 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14527 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14528 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)14529 int16x4_t test_vqshl_n_s16(int16x4_t a) {
14530   return vqshl_n_s16(a, 0);
14531 }
14532 
14533 // CHECK-LABEL: @test_vqshlq_n_s16(
14534 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14535 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14536 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14537 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)14538 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
14539   return vqshlq_n_s16(a, 0);
14540 }
14541 
14542 // CHECK-LABEL: @test_vqshl_n_s32(
14543 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14544 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14545 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14546 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)14547 int32x2_t test_vqshl_n_s32(int32x2_t a) {
14548   return vqshl_n_s32(a, 0);
14549 }
14550 
14551 // CHECK-LABEL: @test_vqshlq_n_s32(
14552 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14553 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14554 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14555 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)14556 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
14557   return vqshlq_n_s32(a, 0);
14558 }
14559 
14560 // CHECK-LABEL: @test_vqshlq_n_s64(
14561 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14562 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14563 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14564 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)14565 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
14566   return vqshlq_n_s64(a, 0);
14567 }
14568 
14569 // CHECK-LABEL: @test_vqshl_n_u8(
14570 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14571 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)14572 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
14573   return vqshl_n_u8(a, 0);
14574 }
14575 
14576 // CHECK-LABEL: @test_vqshlq_n_u8(
14577 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14578 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)14579 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
14580   return vqshlq_n_u8(a, 0);
14581 }
14582 
14583 // CHECK-LABEL: @test_vqshl_n_u16(
14584 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14585 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14586 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14587 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)14588 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
14589   return vqshl_n_u16(a, 0);
14590 }
14591 
14592 // CHECK-LABEL: @test_vqshlq_n_u16(
14593 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14594 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14595 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14596 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)14597 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
14598   return vqshlq_n_u16(a, 0);
14599 }
14600 
14601 // CHECK-LABEL: @test_vqshl_n_u32(
14602 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14603 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14604 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14605 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)14606 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
14607   return vqshl_n_u32(a, 0);
14608 }
14609 
14610 // CHECK-LABEL: @test_vqshlq_n_u32(
14611 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14612 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14613 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14614 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)14615 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
14616   return vqshlq_n_u32(a, 0);
14617 }
14618 
14619 // CHECK-LABEL: @test_vqshlq_n_u64(
14620 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14621 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14622 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14623 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)14624 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
14625   return vqshlq_n_u64(a, 0);
14626 }
14627 
14628 // CHECK-LABEL: @test_vqshl_n_s64(
14629 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14630 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14631 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14632 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)14633 int64x1_t test_vqshl_n_s64(int64x1_t a) {
14634   return vqshl_n_s64(a, 1);
14635 }
14636 
14637 // CHECK-LABEL: @test_vqshlb_n_u8(
14638 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14639 // CHECK:   [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14640 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
14641 // CHECK:   ret i8 [[TMP1]]
test_vqshlb_n_u8(uint8_t a)14642 uint8_t test_vqshlb_n_u8(uint8_t a) {
14643   return (uint8_t)vqshlb_n_u8(a, 7);
14644 }
14645 
14646 // CHECK-LABEL: @test_vqshlh_n_u16(
14647 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14648 // CHECK:   [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14649 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
14650 // CHECK:   ret i16 [[TMP1]]
test_vqshlh_n_u16(uint16_t a)14651 uint16_t test_vqshlh_n_u16(uint16_t a) {
14652   return (uint16_t)vqshlh_n_u16(a, 15);
14653 }
14654 
14655 // CHECK-LABEL: @test_vqshls_n_u32(
14656 // CHECK:   [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
14657 // CHECK:   ret i32 [[VQSHLS_N_U32]]
test_vqshls_n_u32(uint32_t a)14658 uint32_t test_vqshls_n_u32(uint32_t a) {
14659   return (uint32_t)vqshls_n_u32(a, 31);
14660 }
14661 
14662 // CHECK-LABEL: @test_vqshld_n_u64(
14663 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
14664 // CHECK:   ret i64 [[VQSHL_N]]
test_vqshld_n_u64(uint64_t a)14665 uint64_t test_vqshld_n_u64(uint64_t a) {
14666   return (uint64_t)vqshld_n_u64(a, 63);
14667 }
14668 
14669 // CHECK-LABEL: @test_vqshl_n_u64(
14670 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14671 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14672 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14673 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)14674 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
14675   return vqshl_n_u64(a, 1);
14676 }
14677 
14678 // CHECK-LABEL: @test_vqshlub_n_s8(
14679 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14680 // CHECK:   [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14681 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
14682 // CHECK:   ret i8 [[TMP1]]
test_vqshlub_n_s8(int8_t a)14683 int8_t test_vqshlub_n_s8(int8_t a) {
14684   return (int8_t)vqshlub_n_s8(a, 7);
14685 }
14686 
14687 // CHECK-LABEL: @test_vqshluh_n_s16(
14688 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14689 // CHECK:   [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14690 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
14691 // CHECK:   ret i16 [[TMP1]]
test_vqshluh_n_s16(int16_t a)14692 int16_t test_vqshluh_n_s16(int16_t a) {
14693   return (int16_t)vqshluh_n_s16(a, 15);
14694 }
14695 
14696 // CHECK-LABEL: @test_vqshlus_n_s32(
14697 // CHECK:   [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
14698 // CHECK:   ret i32 [[VQSHLUS_N_S32]]
test_vqshlus_n_s32(int32_t a)14699 int32_t test_vqshlus_n_s32(int32_t a) {
14700   return (int32_t)vqshlus_n_s32(a, 31);
14701 }
14702 
14703 // CHECK-LABEL: @test_vqshlud_n_s64(
14704 // CHECK:   [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
14705 // CHECK:   ret i64 [[VQSHLU_N]]
test_vqshlud_n_s64(int64_t a)14706 int64_t test_vqshlud_n_s64(int64_t a) {
14707   return (int64_t)vqshlud_n_s64(a, 63);
14708 }
14709 
14710 // CHECK-LABEL: @test_vqshlu_n_s64(
14711 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14712 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14713 // CHECK:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
14714 // CHECK:   ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)14715 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
14716   return vqshlu_n_s64(a, 1);
14717 }
14718 
14719 // CHECK-LABEL: @test_vsrid_n_s64(
14720 // CHECK:   [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14721 // CHECK:   [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14722 // CHECK:   [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
14723 // CHECK:   [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
14724 // CHECK:   ret i64 [[VSRID_N_S643]]
test_vsrid_n_s64(int64_t a,int64_t b)14725 int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
14726   return (int64_t)vsrid_n_s64(a, b, 63);
14727 }
14728 
14729 // CHECK-LABEL: @test_vsri_n_s64(
14730 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14731 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14732 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14733 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14734 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14735 // CHECK:   ret <1 x i64> [[VSRI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)14736 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
14737   return vsri_n_s64(a, b, 1);
14738 }
14739 
14740 // CHECK-LABEL: @test_vsrid_n_u64(
14741 // CHECK:   [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14742 // CHECK:   [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14743 // CHECK:   [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
14744 // CHECK:   [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
14745 // CHECK:   ret i64 [[VSRID_N_U643]]
test_vsrid_n_u64(uint64_t a,uint64_t b)14746 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
14747   return (uint64_t)vsrid_n_u64(a, b, 63);
14748 }
14749 
14750 // CHECK-LABEL: @test_vsri_n_u64(
14751 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14752 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14753 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14754 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14755 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14756 // CHECK:   ret <1 x i64> [[VSRI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)14757 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
14758   return vsri_n_u64(a, b, 1);
14759 }
14760 
14761 // CHECK-LABEL: @test_vslid_n_s64(
14762 // CHECK:   [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14763 // CHECK:   [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14764 // CHECK:   [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
14765 // CHECK:   [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
14766 // CHECK:   ret i64 [[VSLID_N_S643]]
test_vslid_n_s64(int64_t a,int64_t b)14767 int64_t test_vslid_n_s64(int64_t a, int64_t b) {
14768   return (int64_t)vslid_n_s64(a, b, 63);
14769 }
14770 
14771 // CHECK-LABEL: @test_vsli_n_s64(
14772 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14773 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14774 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14775 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14776 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14777 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)14778 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14779   return vsli_n_s64(a, b, 1);
14780 }
14781 
14782 // CHECK-LABEL: @test_vslid_n_u64(
14783 // CHECK:   [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14784 // CHECK:   [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14785 // CHECK:   [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
14786 // CHECK:   [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
14787 // CHECK:   ret i64 [[VSLID_N_U643]]
test_vslid_n_u64(uint64_t a,uint64_t b)14788 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
14789   return (uint64_t)vslid_n_u64(a, b, 63);
14790 }
14791 
14792 // CHECK-LABEL: @test_vsli_n_u64(
14793 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14794 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14795 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14796 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14797 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14798 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)14799 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
14800   return vsli_n_u64(a, b, 1);
14801 }
14802 
14803 // CHECK-LABEL: @test_vqshrnh_n_s16(
14804 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14805 // CHECK:   [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14806 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
14807 // CHECK:   ret i8 [[TMP1]]
test_vqshrnh_n_s16(int16_t a)14808 int8_t test_vqshrnh_n_s16(int16_t a) {
14809   return (int8_t)vqshrnh_n_s16(a, 8);
14810 }
14811 
14812 // CHECK-LABEL: @test_vqshrns_n_s32(
14813 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14814 // CHECK:   [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14815 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
14816 // CHECK:   ret i16 [[TMP1]]
test_vqshrns_n_s32(int32_t a)14817 int16_t test_vqshrns_n_s32(int32_t a) {
14818   return (int16_t)vqshrns_n_s32(a, 16);
14819 }
14820 
14821 // CHECK-LABEL: @test_vqshrnd_n_s64(
14822 // CHECK:   [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
14823 // CHECK:   ret i32 [[VQSHRND_N_S64]]
test_vqshrnd_n_s64(int64_t a)14824 int32_t test_vqshrnd_n_s64(int64_t a) {
14825   return (int32_t)vqshrnd_n_s64(a, 32);
14826 }
14827 
14828 // CHECK-LABEL: @test_vqshrnh_n_u16(
14829 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14830 // CHECK:   [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14831 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
14832 // CHECK:   ret i8 [[TMP1]]
test_vqshrnh_n_u16(uint16_t a)14833 uint8_t test_vqshrnh_n_u16(uint16_t a) {
14834   return (uint8_t)vqshrnh_n_u16(a, 8);
14835 }
14836 
14837 // CHECK-LABEL: @test_vqshrns_n_u32(
14838 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14839 // CHECK:   [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14840 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
14841 // CHECK:   ret i16 [[TMP1]]
test_vqshrns_n_u32(uint32_t a)14842 uint16_t test_vqshrns_n_u32(uint32_t a) {
14843   return (uint16_t)vqshrns_n_u32(a, 16);
14844 }
14845 
14846 // CHECK-LABEL: @test_vqshrnd_n_u64(
14847 // CHECK:   [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
14848 // CHECK:   ret i32 [[VQSHRND_N_U64]]
test_vqshrnd_n_u64(uint64_t a)14849 uint32_t test_vqshrnd_n_u64(uint64_t a) {
14850   return (uint32_t)vqshrnd_n_u64(a, 32);
14851 }
14852 
14853 // CHECK-LABEL: @test_vqrshrnh_n_s16(
14854 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14855 // CHECK:   [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14856 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
14857 // CHECK:   ret i8 [[TMP1]]
test_vqrshrnh_n_s16(int16_t a)14858 int8_t test_vqrshrnh_n_s16(int16_t a) {
14859   return (int8_t)vqrshrnh_n_s16(a, 8);
14860 }
14861 
14862 // CHECK-LABEL: @test_vqrshrns_n_s32(
14863 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14864 // CHECK:   [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14865 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
14866 // CHECK:   ret i16 [[TMP1]]
test_vqrshrns_n_s32(int32_t a)14867 int16_t test_vqrshrns_n_s32(int32_t a) {
14868   return (int16_t)vqrshrns_n_s32(a, 16);
14869 }
14870 
14871 // CHECK-LABEL: @test_vqrshrnd_n_s64(
14872 // CHECK:   [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
14873 // CHECK:   ret i32 [[VQRSHRND_N_S64]]
test_vqrshrnd_n_s64(int64_t a)14874 int32_t test_vqrshrnd_n_s64(int64_t a) {
14875   return (int32_t)vqrshrnd_n_s64(a, 32);
14876 }
14877 
14878 // CHECK-LABEL: @test_vqrshrnh_n_u16(
14879 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14880 // CHECK:   [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14881 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
14882 // CHECK:   ret i8 [[TMP1]]
test_vqrshrnh_n_u16(uint16_t a)14883 uint8_t test_vqrshrnh_n_u16(uint16_t a) {
14884   return (uint8_t)vqrshrnh_n_u16(a, 8);
14885 }
14886 
14887 // CHECK-LABEL: @test_vqrshrns_n_u32(
14888 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14889 // CHECK:   [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14890 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
14891 // CHECK:   ret i16 [[TMP1]]
test_vqrshrns_n_u32(uint32_t a)14892 uint16_t test_vqrshrns_n_u32(uint32_t a) {
14893   return (uint16_t)vqrshrns_n_u32(a, 16);
14894 }
14895 
14896 // CHECK-LABEL: @test_vqrshrnd_n_u64(
14897 // CHECK:   [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
14898 // CHECK:   ret i32 [[VQRSHRND_N_U64]]
test_vqrshrnd_n_u64(uint64_t a)14899 uint32_t test_vqrshrnd_n_u64(uint64_t a) {
14900   return (uint32_t)vqrshrnd_n_u64(a, 32);
14901 }
14902 
14903 // CHECK-LABEL: @test_vqshrunh_n_s16(
14904 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14905 // CHECK:   [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14906 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
14907 // CHECK:   ret i8 [[TMP1]]
test_vqshrunh_n_s16(int16_t a)14908 int8_t test_vqshrunh_n_s16(int16_t a) {
14909   return (int8_t)vqshrunh_n_s16(a, 8);
14910 }
14911 
14912 // CHECK-LABEL: @test_vqshruns_n_s32(
14913 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14914 // CHECK:   [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14915 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
14916 // CHECK:   ret i16 [[TMP1]]
test_vqshruns_n_s32(int32_t a)14917 int16_t test_vqshruns_n_s32(int32_t a) {
14918   return (int16_t)vqshruns_n_s32(a, 16);
14919 }
14920 
14921 // CHECK-LABEL: @test_vqshrund_n_s64(
14922 // CHECK:   [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
14923 // CHECK:   ret i32 [[VQSHRUND_N_S64]]
test_vqshrund_n_s64(int64_t a)14924 int32_t test_vqshrund_n_s64(int64_t a) {
14925   return (int32_t)vqshrund_n_s64(a, 32);
14926 }
14927 
14928 // CHECK-LABEL: @test_vqrshrunh_n_s16(
14929 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14930 // CHECK:   [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14931 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
14932 // CHECK:   ret i8 [[TMP1]]
test_vqrshrunh_n_s16(int16_t a)14933 int8_t test_vqrshrunh_n_s16(int16_t a) {
14934   return (int8_t)vqrshrunh_n_s16(a, 8);
14935 }
14936 
14937 // CHECK-LABEL: @test_vqrshruns_n_s32(
14938 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14939 // CHECK:   [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14940 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
14941 // CHECK:   ret i16 [[TMP1]]
test_vqrshruns_n_s32(int32_t a)14942 int16_t test_vqrshruns_n_s32(int32_t a) {
14943   return (int16_t)vqrshruns_n_s32(a, 16);
14944 }
14945 
14946 // CHECK-LABEL: @test_vqrshrund_n_s64(
14947 // CHECK:   [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
14948 // CHECK:   ret i32 [[VQRSHRUND_N_S64]]
test_vqrshrund_n_s64(int64_t a)14949 int32_t test_vqrshrund_n_s64(int64_t a) {
14950   return (int32_t)vqrshrund_n_s64(a, 32);
14951 }
14952 
14953 // CHECK-LABEL: @test_vcvts_n_f32_s32(
14954 // CHECK:   [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
14955 // CHECK:   ret float [[VCVTS_N_F32_S32]]
test_vcvts_n_f32_s32(int32_t a)14956 float32_t test_vcvts_n_f32_s32(int32_t a) {
14957   return vcvts_n_f32_s32(a, 1);
14958 }
14959 
14960 // CHECK-LABEL: @test_vcvtd_n_f64_s64(
14961 // CHECK:   [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
14962 // CHECK:   ret double [[VCVTD_N_F64_S64]]
test_vcvtd_n_f64_s64(int64_t a)14963 float64_t test_vcvtd_n_f64_s64(int64_t a) {
14964   return vcvtd_n_f64_s64(a, 1);
14965 }
14966 
14967 // CHECK-LABEL: @test_vcvts_n_f32_u32(
14968 // CHECK:   [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
14969 // CHECK:   ret float [[VCVTS_N_F32_U32]]
test_vcvts_n_f32_u32(uint32_t a)14970 float32_t test_vcvts_n_f32_u32(uint32_t a) {
14971   return vcvts_n_f32_u32(a, 32);
14972 }
14973 
14974 // CHECK-LABEL: @test_vcvtd_n_f64_u64(
14975 // CHECK:   [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
14976 // CHECK:   ret double [[VCVTD_N_F64_U64]]
test_vcvtd_n_f64_u64(uint64_t a)14977 float64_t test_vcvtd_n_f64_u64(uint64_t a) {
14978   return vcvtd_n_f64_u64(a, 64);
14979 }
14980 
14981 // CHECK-LABEL: @test_vcvts_n_s32_f32(
14982 // CHECK:   [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
14983 // CHECK:   ret i32 [[VCVTS_N_S32_F32]]
test_vcvts_n_s32_f32(float32_t a)14984 int32_t test_vcvts_n_s32_f32(float32_t a) {
14985   return (int32_t)vcvts_n_s32_f32(a, 1);
14986 }
14987 
14988 // CHECK-LABEL: @test_vcvtd_n_s64_f64(
14989 // CHECK:   [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
14990 // CHECK:   ret i64 [[VCVTD_N_S64_F64]]
test_vcvtd_n_s64_f64(float64_t a)14991 int64_t test_vcvtd_n_s64_f64(float64_t a) {
14992   return (int64_t)vcvtd_n_s64_f64(a, 1);
14993 }
14994 
14995 // CHECK-LABEL: @test_vcvts_n_u32_f32(
14996 // CHECK:   [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
14997 // CHECK:   ret i32 [[VCVTS_N_U32_F32]]
test_vcvts_n_u32_f32(float32_t a)14998 uint32_t test_vcvts_n_u32_f32(float32_t a) {
14999   return (uint32_t)vcvts_n_u32_f32(a, 32);
15000 }
15001 
15002 // CHECK-LABEL: @test_vcvtd_n_u64_f64(
15003 // CHECK:   [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
15004 // CHECK:   ret i64 [[VCVTD_N_U64_F64]]
test_vcvtd_n_u64_f64(float64_t a)15005 uint64_t test_vcvtd_n_u64_f64(float64_t a) {
15006   return (uint64_t)vcvtd_n_u64_f64(a, 64);
15007 }
15008 
15009 // CHECK-LABEL: @test_vreinterpret_s8_s16(
15010 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15011 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)15012 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
15013   return vreinterpret_s8_s16(a);
15014 }
15015 
15016 // CHECK-LABEL: @test_vreinterpret_s8_s32(
15017 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15018 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)15019 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
15020   return vreinterpret_s8_s32(a);
15021 }
15022 
15023 // CHECK-LABEL: @test_vreinterpret_s8_s64(
15024 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15025 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)15026 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
15027   return vreinterpret_s8_s64(a);
15028 }
15029 
15030 // CHECK-LABEL: @test_vreinterpret_s8_u8(
15031 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)15032 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
15033   return vreinterpret_s8_u8(a);
15034 }
15035 
15036 // CHECK-LABEL: @test_vreinterpret_s8_u16(
15037 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15038 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)15039 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
15040   return vreinterpret_s8_u16(a);
15041 }
15042 
15043 // CHECK-LABEL: @test_vreinterpret_s8_u32(
15044 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15045 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)15046 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
15047   return vreinterpret_s8_u32(a);
15048 }
15049 
15050 // CHECK-LABEL: @test_vreinterpret_s8_u64(
15051 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15052 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)15053 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
15054   return vreinterpret_s8_u64(a);
15055 }
15056 
15057 // CHECK-LABEL: @test_vreinterpret_s8_f16(
15058 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15059 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)15060 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
15061   return vreinterpret_s8_f16(a);
15062 }
15063 
15064 // CHECK-LABEL: @test_vreinterpret_s8_f32(
15065 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15066 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)15067 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
15068   return vreinterpret_s8_f32(a);
15069 }
15070 
15071 // CHECK-LABEL: @test_vreinterpret_s8_f64(
15072 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15073 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f64(float64x1_t a)15074 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
15075   return vreinterpret_s8_f64(a);
15076 }
15077 
15078 // CHECK-LABEL: @test_vreinterpret_s8_p8(
15079 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)15080 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
15081   return vreinterpret_s8_p8(a);
15082 }
15083 
15084 // CHECK-LABEL: @test_vreinterpret_s8_p16(
15085 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15086 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)15087 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
15088   return vreinterpret_s8_p16(a);
15089 }
15090 
15091 // CHECK-LABEL: @test_vreinterpret_s8_p64(
15092 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15093 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p64(poly64x1_t a)15094 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
15095   return vreinterpret_s8_p64(a);
15096 }
15097 
15098 // CHECK-LABEL: @test_vreinterpret_s16_s8(
15099 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15100 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)15101 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
15102   return vreinterpret_s16_s8(a);
15103 }
15104 
15105 // CHECK-LABEL: @test_vreinterpret_s16_s32(
15106 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15107 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)15108 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
15109   return vreinterpret_s16_s32(a);
15110 }
15111 
15112 // CHECK-LABEL: @test_vreinterpret_s16_s64(
15113 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15114 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)15115 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
15116   return vreinterpret_s16_s64(a);
15117 }
15118 
15119 // CHECK-LABEL: @test_vreinterpret_s16_u8(
15120 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15121 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)15122 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
15123   return vreinterpret_s16_u8(a);
15124 }
15125 
15126 // CHECK-LABEL: @test_vreinterpret_s16_u16(
15127 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)15128 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
15129   return vreinterpret_s16_u16(a);
15130 }
15131 
15132 // CHECK-LABEL: @test_vreinterpret_s16_u32(
15133 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15134 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)15135 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
15136   return vreinterpret_s16_u32(a);
15137 }
15138 
15139 // CHECK-LABEL: @test_vreinterpret_s16_u64(
15140 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15141 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)15142 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
15143   return vreinterpret_s16_u64(a);
15144 }
15145 
15146 // CHECK-LABEL: @test_vreinterpret_s16_f16(
15147 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15148 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)15149 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
15150   return vreinterpret_s16_f16(a);
15151 }
15152 
15153 // CHECK-LABEL: @test_vreinterpret_s16_f32(
15154 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15155 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)15156 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
15157   return vreinterpret_s16_f32(a);
15158 }
15159 
15160 // CHECK-LABEL: @test_vreinterpret_s16_f64(
15161 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15162 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f64(float64x1_t a)15163 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
15164   return vreinterpret_s16_f64(a);
15165 }
15166 
15167 // CHECK-LABEL: @test_vreinterpret_s16_p8(
15168 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15169 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)15170 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
15171   return vreinterpret_s16_p8(a);
15172 }
15173 
15174 // CHECK-LABEL: @test_vreinterpret_s16_p16(
15175 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)15176 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
15177   return vreinterpret_s16_p16(a);
15178 }
15179 
15180 // CHECK-LABEL: @test_vreinterpret_s16_p64(
15181 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15182 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p64(poly64x1_t a)15183 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
15184   return vreinterpret_s16_p64(a);
15185 }
15186 
15187 // CHECK-LABEL: @test_vreinterpret_s32_s8(
15188 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15189 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)15190 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
15191   return vreinterpret_s32_s8(a);
15192 }
15193 
15194 // CHECK-LABEL: @test_vreinterpret_s32_s16(
15195 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15196 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)15197 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
15198   return vreinterpret_s32_s16(a);
15199 }
15200 
15201 // CHECK-LABEL: @test_vreinterpret_s32_s64(
15202 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15203 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)15204 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
15205   return vreinterpret_s32_s64(a);
15206 }
15207 
15208 // CHECK-LABEL: @test_vreinterpret_s32_u8(
15209 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15210 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)15211 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
15212   return vreinterpret_s32_u8(a);
15213 }
15214 
15215 // CHECK-LABEL: @test_vreinterpret_s32_u16(
15216 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15217 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)15218 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
15219   return vreinterpret_s32_u16(a);
15220 }
15221 
15222 // CHECK-LABEL: @test_vreinterpret_s32_u32(
15223 // CHECK:   ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)15224 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
15225   return vreinterpret_s32_u32(a);
15226 }
15227 
15228 // CHECK-LABEL: @test_vreinterpret_s32_u64(
15229 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15230 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)15231 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
15232   return vreinterpret_s32_u64(a);
15233 }
15234 
15235 // CHECK-LABEL: @test_vreinterpret_s32_f16(
15236 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15237 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)15238 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
15239   return vreinterpret_s32_f16(a);
15240 }
15241 
15242 // CHECK-LABEL: @test_vreinterpret_s32_f32(
15243 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15244 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)15245 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
15246   return vreinterpret_s32_f32(a);
15247 }
15248 
15249 // CHECK-LABEL: @test_vreinterpret_s32_f64(
15250 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15251 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f64(float64x1_t a)15252 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
15253   return vreinterpret_s32_f64(a);
15254 }
15255 
15256 // CHECK-LABEL: @test_vreinterpret_s32_p8(
15257 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15258 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)15259 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
15260   return vreinterpret_s32_p8(a);
15261 }
15262 
15263 // CHECK-LABEL: @test_vreinterpret_s32_p16(
15264 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15265 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)15266 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
15267   return vreinterpret_s32_p16(a);
15268 }
15269 
15270 // CHECK-LABEL: @test_vreinterpret_s32_p64(
15271 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15272 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p64(poly64x1_t a)15273 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
15274   return vreinterpret_s32_p64(a);
15275 }
15276 
15277 // CHECK-LABEL: @test_vreinterpret_s64_s8(
15278 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15279 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)15280 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
15281   return vreinterpret_s64_s8(a);
15282 }
15283 
15284 // CHECK-LABEL: @test_vreinterpret_s64_s16(
15285 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15286 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)15287 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
15288   return vreinterpret_s64_s16(a);
15289 }
15290 
15291 // CHECK-LABEL: @test_vreinterpret_s64_s32(
15292 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15293 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)15294 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
15295   return vreinterpret_s64_s32(a);
15296 }
15297 
15298 // CHECK-LABEL: @test_vreinterpret_s64_u8(
15299 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15300 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)15301 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
15302   return vreinterpret_s64_u8(a);
15303 }
15304 
15305 // CHECK-LABEL: @test_vreinterpret_s64_u16(
15306 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15307 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)15308 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
15309   return vreinterpret_s64_u16(a);
15310 }
15311 
15312 // CHECK-LABEL: @test_vreinterpret_s64_u32(
15313 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15314 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)15315 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
15316   return vreinterpret_s64_u32(a);
15317 }
15318 
15319 // CHECK-LABEL: @test_vreinterpret_s64_u64(
15320 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)15321 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
15322   return vreinterpret_s64_u64(a);
15323 }
15324 
15325 // CHECK-LABEL: @test_vreinterpret_s64_f16(
15326 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15327 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)15328 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
15329   return vreinterpret_s64_f16(a);
15330 }
15331 
15332 // CHECK-LABEL: @test_vreinterpret_s64_f32(
15333 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15334 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)15335 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
15336   return vreinterpret_s64_f32(a);
15337 }
15338 
15339 // CHECK-LABEL: @test_vreinterpret_s64_f64(
15340 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15341 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f64(float64x1_t a)15342 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
15343   return vreinterpret_s64_f64(a);
15344 }
15345 
15346 // CHECK-LABEL: @test_vreinterpret_s64_p8(
15347 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15348 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)15349 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
15350   return vreinterpret_s64_p8(a);
15351 }
15352 
15353 // CHECK-LABEL: @test_vreinterpret_s64_p16(
15354 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15355 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)15356 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
15357   return vreinterpret_s64_p16(a);
15358 }
15359 
15360 // CHECK-LABEL: @test_vreinterpret_s64_p64(
15361 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_p64(poly64x1_t a)15362 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
15363   return vreinterpret_s64_p64(a);
15364 }
15365 
15366 // CHECK-LABEL: @test_vreinterpret_u8_s8(
15367 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)15368 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
15369   return vreinterpret_u8_s8(a);
15370 }
15371 
15372 // CHECK-LABEL: @test_vreinterpret_u8_s16(
15373 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15374 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)15375 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
15376   return vreinterpret_u8_s16(a);
15377 }
15378 
15379 // CHECK-LABEL: @test_vreinterpret_u8_s32(
15380 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15381 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)15382 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
15383   return vreinterpret_u8_s32(a);
15384 }
15385 
15386 // CHECK-LABEL: @test_vreinterpret_u8_s64(
15387 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15388 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)15389 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
15390   return vreinterpret_u8_s64(a);
15391 }
15392 
15393 // CHECK-LABEL: @test_vreinterpret_u8_u16(
15394 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15395 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)15396 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
15397   return vreinterpret_u8_u16(a);
15398 }
15399 
15400 // CHECK-LABEL: @test_vreinterpret_u8_u32(
15401 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15402 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)15403 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
15404   return vreinterpret_u8_u32(a);
15405 }
15406 
15407 // CHECK-LABEL: @test_vreinterpret_u8_u64(
15408 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15409 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)15410 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
15411   return vreinterpret_u8_u64(a);
15412 }
15413 
15414 // CHECK-LABEL: @test_vreinterpret_u8_f16(
15415 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15416 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)15417 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
15418   return vreinterpret_u8_f16(a);
15419 }
15420 
15421 // CHECK-LABEL: @test_vreinterpret_u8_f32(
15422 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15423 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)15424 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
15425   return vreinterpret_u8_f32(a);
15426 }
15427 
15428 // CHECK-LABEL: @test_vreinterpret_u8_f64(
15429 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15430 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f64(float64x1_t a)15431 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
15432   return vreinterpret_u8_f64(a);
15433 }
15434 
15435 // CHECK-LABEL: @test_vreinterpret_u8_p8(
15436 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)15437 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
15438   return vreinterpret_u8_p8(a);
15439 }
15440 
15441 // CHECK-LABEL: @test_vreinterpret_u8_p16(
15442 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15443 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)15444 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
15445   return vreinterpret_u8_p16(a);
15446 }
15447 
15448 // CHECK-LABEL: @test_vreinterpret_u8_p64(
15449 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15450 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p64(poly64x1_t a)15451 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
15452   return vreinterpret_u8_p64(a);
15453 }
15454 
15455 // CHECK-LABEL: @test_vreinterpret_u16_s8(
15456 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15457 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)15458 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
15459   return vreinterpret_u16_s8(a);
15460 }
15461 
15462 // CHECK-LABEL: @test_vreinterpret_u16_s16(
15463 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)15464 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
15465   return vreinterpret_u16_s16(a);
15466 }
15467 
15468 // CHECK-LABEL: @test_vreinterpret_u16_s32(
15469 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15470 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)15471 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
15472   return vreinterpret_u16_s32(a);
15473 }
15474 
15475 // CHECK-LABEL: @test_vreinterpret_u16_s64(
15476 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15477 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)15478 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
15479   return vreinterpret_u16_s64(a);
15480 }
15481 
15482 // CHECK-LABEL: @test_vreinterpret_u16_u8(
15483 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15484 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)15485 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
15486   return vreinterpret_u16_u8(a);
15487 }
15488 
15489 // CHECK-LABEL: @test_vreinterpret_u16_u32(
15490 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15491 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)15492 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
15493   return vreinterpret_u16_u32(a);
15494 }
15495 
15496 // CHECK-LABEL: @test_vreinterpret_u16_u64(
15497 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15498 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)15499 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
15500   return vreinterpret_u16_u64(a);
15501 }
15502 
15503 // CHECK-LABEL: @test_vreinterpret_u16_f16(
15504 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15505 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)15506 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
15507   return vreinterpret_u16_f16(a);
15508 }
15509 
15510 // CHECK-LABEL: @test_vreinterpret_u16_f32(
15511 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15512 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)15513 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
15514   return vreinterpret_u16_f32(a);
15515 }
15516 
15517 // CHECK-LABEL: @test_vreinterpret_u16_f64(
15518 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15519 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f64(float64x1_t a)15520 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
15521   return vreinterpret_u16_f64(a);
15522 }
15523 
15524 // CHECK-LABEL: @test_vreinterpret_u16_p8(
15525 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15526 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)15527 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
15528   return vreinterpret_u16_p8(a);
15529 }
15530 
15531 // CHECK-LABEL: @test_vreinterpret_u16_p16(
15532 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)15533 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
15534   return vreinterpret_u16_p16(a);
15535 }
15536 
15537 // CHECK-LABEL: @test_vreinterpret_u16_p64(
15538 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15539 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p64(poly64x1_t a)15540 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
15541   return vreinterpret_u16_p64(a);
15542 }
15543 
15544 // CHECK-LABEL: @test_vreinterpret_u32_s8(
15545 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15546 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)15547 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
15548   return vreinterpret_u32_s8(a);
15549 }
15550 
15551 // CHECK-LABEL: @test_vreinterpret_u32_s16(
15552 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15553 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)15554 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
15555   return vreinterpret_u32_s16(a);
15556 }
15557 
15558 // CHECK-LABEL: @test_vreinterpret_u32_s32(
15559 // CHECK:   ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)15560 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
15561   return vreinterpret_u32_s32(a);
15562 }
15563 
15564 // CHECK-LABEL: @test_vreinterpret_u32_s64(
15565 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15566 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)15567 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
15568   return vreinterpret_u32_s64(a);
15569 }
15570 
15571 // CHECK-LABEL: @test_vreinterpret_u32_u8(
15572 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15573 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)15574 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
15575   return vreinterpret_u32_u8(a);
15576 }
15577 
15578 // CHECK-LABEL: @test_vreinterpret_u32_u16(
15579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15580 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)15581 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
15582   return vreinterpret_u32_u16(a);
15583 }
15584 
15585 // CHECK-LABEL: @test_vreinterpret_u32_u64(
15586 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15587 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)15588 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
15589   return vreinterpret_u32_u64(a);
15590 }
15591 
15592 // CHECK-LABEL: @test_vreinterpret_u32_f16(
15593 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15594 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)15595 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
15596   return vreinterpret_u32_f16(a);
15597 }
15598 
15599 // CHECK-LABEL: @test_vreinterpret_u32_f32(
15600 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15601 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)15602 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
15603   return vreinterpret_u32_f32(a);
15604 }
15605 
15606 // CHECK-LABEL: @test_vreinterpret_u32_f64(
15607 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15608 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f64(float64x1_t a)15609 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
15610   return vreinterpret_u32_f64(a);
15611 }
15612 
15613 // CHECK-LABEL: @test_vreinterpret_u32_p8(
15614 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15615 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)15616 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
15617   return vreinterpret_u32_p8(a);
15618 }
15619 
15620 // CHECK-LABEL: @test_vreinterpret_u32_p16(
15621 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15622 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)15623 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
15624   return vreinterpret_u32_p16(a);
15625 }
15626 
15627 // CHECK-LABEL: @test_vreinterpret_u32_p64(
15628 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15629 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p64(poly64x1_t a)15630 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
15631   return vreinterpret_u32_p64(a);
15632 }
15633 
15634 // CHECK-LABEL: @test_vreinterpret_u64_s8(
15635 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15636 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)15637 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
15638   return vreinterpret_u64_s8(a);
15639 }
15640 
15641 // CHECK-LABEL: @test_vreinterpret_u64_s16(
15642 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15643 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)15644 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
15645   return vreinterpret_u64_s16(a);
15646 }
15647 
15648 // CHECK-LABEL: @test_vreinterpret_u64_s32(
15649 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15650 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)15651 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
15652   return vreinterpret_u64_s32(a);
15653 }
15654 
15655 // CHECK-LABEL: @test_vreinterpret_u64_s64(
15656 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)15657 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
15658   return vreinterpret_u64_s64(a);
15659 }
15660 
15661 // CHECK-LABEL: @test_vreinterpret_u64_u8(
15662 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15663 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)15664 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
15665   return vreinterpret_u64_u8(a);
15666 }
15667 
15668 // CHECK-LABEL: @test_vreinterpret_u64_u16(
15669 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15670 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)15671 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
15672   return vreinterpret_u64_u16(a);
15673 }
15674 
15675 // CHECK-LABEL: @test_vreinterpret_u64_u32(
15676 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15677 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)15678 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
15679   return vreinterpret_u64_u32(a);
15680 }
15681 
15682 // CHECK-LABEL: @test_vreinterpret_u64_f16(
15683 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15684 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)15685 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
15686   return vreinterpret_u64_f16(a);
15687 }
15688 
15689 // CHECK-LABEL: @test_vreinterpret_u64_f32(
15690 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15691 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)15692 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
15693   return vreinterpret_u64_f32(a);
15694 }
15695 
15696 // CHECK-LABEL: @test_vreinterpret_u64_f64(
15697 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15698 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f64(float64x1_t a)15699 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
15700   return vreinterpret_u64_f64(a);
15701 }
15702 
15703 // CHECK-LABEL: @test_vreinterpret_u64_p8(
15704 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15705 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)15706 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
15707   return vreinterpret_u64_p8(a);
15708 }
15709 
15710 // CHECK-LABEL: @test_vreinterpret_u64_p16(
15711 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15712 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)15713 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
15714   return vreinterpret_u64_p16(a);
15715 }
15716 
15717 // CHECK-LABEL: @test_vreinterpret_u64_p64(
15718 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_p64(poly64x1_t a)15719 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
15720   return vreinterpret_u64_p64(a);
15721 }
15722 
15723 // CHECK-LABEL: @test_vreinterpret_f16_s8(
15724 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15725 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)15726 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
15727   return vreinterpret_f16_s8(a);
15728 }
15729 
15730 // CHECK-LABEL: @test_vreinterpret_f16_s16(
15731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15732 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)15733 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
15734   return vreinterpret_f16_s16(a);
15735 }
15736 
15737 // CHECK-LABEL: @test_vreinterpret_f16_s32(
15738 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15739 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)15740 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
15741   return vreinterpret_f16_s32(a);
15742 }
15743 
15744 // CHECK-LABEL: @test_vreinterpret_f16_s64(
15745 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15746 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)15747 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
15748   return vreinterpret_f16_s64(a);
15749 }
15750 
15751 // CHECK-LABEL: @test_vreinterpret_f16_u8(
15752 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15753 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)15754 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
15755   return vreinterpret_f16_u8(a);
15756 }
15757 
15758 // CHECK-LABEL: @test_vreinterpret_f16_u16(
15759 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15760 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)15761 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
15762   return vreinterpret_f16_u16(a);
15763 }
15764 
15765 // CHECK-LABEL: @test_vreinterpret_f16_u32(
15766 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15767 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)15768 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
15769   return vreinterpret_f16_u32(a);
15770 }
15771 
15772 // CHECK-LABEL: @test_vreinterpret_f16_u64(
15773 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15774 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)15775 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
15776   return vreinterpret_f16_u64(a);
15777 }
15778 
15779 // CHECK-LABEL: @test_vreinterpret_f16_f32(
15780 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
15781 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)15782 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
15783   return vreinterpret_f16_f32(a);
15784 }
15785 
15786 // CHECK-LABEL: @test_vreinterpret_f16_f64(
15787 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
15788 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f64(float64x1_t a)15789 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
15790   return vreinterpret_f16_f64(a);
15791 }
15792 
15793 // CHECK-LABEL: @test_vreinterpret_f16_p8(
15794 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15795 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)15796 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
15797   return vreinterpret_f16_p8(a);
15798 }
15799 
15800 // CHECK-LABEL: @test_vreinterpret_f16_p16(
15801 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15802 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)15803 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
15804   return vreinterpret_f16_p16(a);
15805 }
15806 
15807 // CHECK-LABEL: @test_vreinterpret_f16_p64(
15808 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15809 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p64(poly64x1_t a)15810 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
15811   return vreinterpret_f16_p64(a);
15812 }
15813 
15814 // CHECK-LABEL: @test_vreinterpret_f32_s8(
15815 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15816 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)15817 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
15818   return vreinterpret_f32_s8(a);
15819 }
15820 
15821 // CHECK-LABEL: @test_vreinterpret_f32_s16(
15822 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15823 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)15824 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
15825   return vreinterpret_f32_s16(a);
15826 }
15827 
15828 // CHECK-LABEL: @test_vreinterpret_f32_s32(
15829 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15830 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)15831 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
15832   return vreinterpret_f32_s32(a);
15833 }
15834 
15835 // CHECK-LABEL: @test_vreinterpret_f32_s64(
15836 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15837 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)15838 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
15839   return vreinterpret_f32_s64(a);
15840 }
15841 
15842 // CHECK-LABEL: @test_vreinterpret_f32_u8(
15843 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15844 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)15845 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
15846   return vreinterpret_f32_u8(a);
15847 }
15848 
15849 // CHECK-LABEL: @test_vreinterpret_f32_u16(
15850 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15851 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)15852 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
15853   return vreinterpret_f32_u16(a);
15854 }
15855 
15856 // CHECK-LABEL: @test_vreinterpret_f32_u32(
15857 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15858 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)15859 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
15860   return vreinterpret_f32_u32(a);
15861 }
15862 
15863 // CHECK-LABEL: @test_vreinterpret_f32_u64(
15864 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15865 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)15866 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
15867   return vreinterpret_f32_u64(a);
15868 }
15869 
15870 // CHECK-LABEL: @test_vreinterpret_f32_f16(
15871 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
15872 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)15873 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
15874   return vreinterpret_f32_f16(a);
15875 }
15876 
15877 // CHECK-LABEL: @test_vreinterpret_f32_f64(
15878 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
15879 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f64(float64x1_t a)15880 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
15881   return vreinterpret_f32_f64(a);
15882 }
15883 
15884 // CHECK-LABEL: @test_vreinterpret_f32_p8(
15885 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15886 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)15887 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
15888   return vreinterpret_f32_p8(a);
15889 }
15890 
15891 // CHECK-LABEL: @test_vreinterpret_f32_p16(
15892 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15893 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)15894 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
15895   return vreinterpret_f32_p16(a);
15896 }
15897 
15898 // CHECK-LABEL: @test_vreinterpret_f32_p64(
15899 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15900 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p64(poly64x1_t a)15901 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
15902   return vreinterpret_f32_p64(a);
15903 }
15904 
15905 // CHECK-LABEL: @test_vreinterpret_f64_s8(
15906 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15907 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s8(int8x8_t a)15908 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
15909   return vreinterpret_f64_s8(a);
15910 }
15911 
15912 // CHECK-LABEL: @test_vreinterpret_f64_s16(
15913 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15914 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s16(int16x4_t a)15915 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
15916   return vreinterpret_f64_s16(a);
15917 }
15918 
15919 // CHECK-LABEL: @test_vreinterpret_f64_s32(
15920 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15921 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s32(int32x2_t a)15922 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
15923   return vreinterpret_f64_s32(a);
15924 }
15925 
15926 // CHECK-LABEL: @test_vreinterpret_f64_s64(
15927 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15928 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s64(int64x1_t a)15929 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
15930   return vreinterpret_f64_s64(a);
15931 }
15932 
15933 // CHECK-LABEL: @test_vreinterpret_f64_u8(
15934 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15935 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u8(uint8x8_t a)15936 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
15937   return vreinterpret_f64_u8(a);
15938 }
15939 
15940 // CHECK-LABEL: @test_vreinterpret_f64_u16(
15941 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15942 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u16(uint16x4_t a)15943 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
15944   return vreinterpret_f64_u16(a);
15945 }
15946 
15947 // CHECK-LABEL: @test_vreinterpret_f64_u32(
15948 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15949 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u32(uint32x2_t a)15950 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
15951   return vreinterpret_f64_u32(a);
15952 }
15953 
15954 // CHECK-LABEL: @test_vreinterpret_f64_u64(
15955 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15956 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u64(uint64x1_t a)15957 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
15958   return vreinterpret_f64_u64(a);
15959 }
15960 
15961 // CHECK-LABEL: @test_vreinterpret_f64_f16(
15962 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
15963 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f16(float16x4_t a)15964 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
15965   return vreinterpret_f64_f16(a);
15966 }
15967 
15968 // CHECK-LABEL: @test_vreinterpret_f64_f32(
15969 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
15970 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f32(float32x2_t a)15971 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
15972   return vreinterpret_f64_f32(a);
15973 }
15974 
15975 // CHECK-LABEL: @test_vreinterpret_f64_p8(
15976 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15977 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p8(poly8x8_t a)15978 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
15979   return vreinterpret_f64_p8(a);
15980 }
15981 
15982 // CHECK-LABEL: @test_vreinterpret_f64_p16(
15983 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15984 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p16(poly16x4_t a)15985 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
15986   return vreinterpret_f64_p16(a);
15987 }
15988 
15989 // CHECK-LABEL: @test_vreinterpret_f64_p64(
15990 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15991 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p64(poly64x1_t a)15992 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
15993   return vreinterpret_f64_p64(a);
15994 }
15995 
15996 // CHECK-LABEL: @test_vreinterpret_p8_s8(
15997 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)15998 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
15999   return vreinterpret_p8_s8(a);
16000 }
16001 
16002 // CHECK-LABEL: @test_vreinterpret_p8_s16(
16003 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16004 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)16005 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
16006   return vreinterpret_p8_s16(a);
16007 }
16008 
16009 // CHECK-LABEL: @test_vreinterpret_p8_s32(
16010 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16011 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)16012 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
16013   return vreinterpret_p8_s32(a);
16014 }
16015 
16016 // CHECK-LABEL: @test_vreinterpret_p8_s64(
16017 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16018 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)16019 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
16020   return vreinterpret_p8_s64(a);
16021 }
16022 
16023 // CHECK-LABEL: @test_vreinterpret_p8_u8(
16024 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)16025 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
16026   return vreinterpret_p8_u8(a);
16027 }
16028 
16029 // CHECK-LABEL: @test_vreinterpret_p8_u16(
16030 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16031 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)16032 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
16033   return vreinterpret_p8_u16(a);
16034 }
16035 
16036 // CHECK-LABEL: @test_vreinterpret_p8_u32(
16037 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16038 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)16039 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
16040   return vreinterpret_p8_u32(a);
16041 }
16042 
16043 // CHECK-LABEL: @test_vreinterpret_p8_u64(
16044 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16045 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)16046 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
16047   return vreinterpret_p8_u64(a);
16048 }
16049 
16050 // CHECK-LABEL: @test_vreinterpret_p8_f16(
16051 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
16052 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)16053 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
16054   return vreinterpret_p8_f16(a);
16055 }
16056 
16057 // CHECK-LABEL: @test_vreinterpret_p8_f32(
16058 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
16059 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)16060 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
16061   return vreinterpret_p8_f32(a);
16062 }
16063 
16064 // CHECK-LABEL: @test_vreinterpret_p8_f64(
16065 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
16066 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f64(float64x1_t a)16067 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
16068   return vreinterpret_p8_f64(a);
16069 }
16070 
16071 // CHECK-LABEL: @test_vreinterpret_p8_p16(
16072 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16073 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)16074 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
16075   return vreinterpret_p8_p16(a);
16076 }
16077 
16078 // CHECK-LABEL: @test_vreinterpret_p8_p64(
16079 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16080 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p64(poly64x1_t a)16081 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
16082   return vreinterpret_p8_p64(a);
16083 }
16084 
16085 // CHECK-LABEL: @test_vreinterpret_p16_s8(
16086 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16087 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)16088 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
16089   return vreinterpret_p16_s8(a);
16090 }
16091 
16092 // CHECK-LABEL: @test_vreinterpret_p16_s16(
16093 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)16094 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
16095   return vreinterpret_p16_s16(a);
16096 }
16097 
16098 // CHECK-LABEL: @test_vreinterpret_p16_s32(
16099 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16100 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)16101 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
16102   return vreinterpret_p16_s32(a);
16103 }
16104 
16105 // CHECK-LABEL: @test_vreinterpret_p16_s64(
16106 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16107 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)16108 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
16109   return vreinterpret_p16_s64(a);
16110 }
16111 
16112 // CHECK-LABEL: @test_vreinterpret_p16_u8(
16113 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16114 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)16115 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
16116   return vreinterpret_p16_u8(a);
16117 }
16118 
16119 // CHECK-LABEL: @test_vreinterpret_p16_u16(
16120 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)16121 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
16122   return vreinterpret_p16_u16(a);
16123 }
16124 
16125 // CHECK-LABEL: @test_vreinterpret_p16_u32(
16126 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16127 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)16128 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
16129   return vreinterpret_p16_u32(a);
16130 }
16131 
16132 // CHECK-LABEL: @test_vreinterpret_p16_u64(
16133 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16134 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)16135 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
16136   return vreinterpret_p16_u64(a);
16137 }
16138 
16139 // CHECK-LABEL: @test_vreinterpret_p16_f16(
16140 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
16141 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)16142 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
16143   return vreinterpret_p16_f16(a);
16144 }
16145 
16146 // CHECK-LABEL: @test_vreinterpret_p16_f32(
16147 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
16148 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)16149 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
16150   return vreinterpret_p16_f32(a);
16151 }
16152 
16153 // CHECK-LABEL: @test_vreinterpret_p16_f64(
16154 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
16155 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f64(float64x1_t a)16156 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
16157   return vreinterpret_p16_f64(a);
16158 }
16159 
16160 // CHECK-LABEL: @test_vreinterpret_p16_p8(
16161 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16162 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)16163 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
16164   return vreinterpret_p16_p8(a);
16165 }
16166 
16167 // CHECK-LABEL: @test_vreinterpret_p16_p64(
16168 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16169 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p64(poly64x1_t a)16170 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
16171   return vreinterpret_p16_p64(a);
16172 }
16173 
16174 // CHECK-LABEL: @test_vreinterpret_p64_s8(
16175 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16176 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s8(int8x8_t a)16177 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
16178   return vreinterpret_p64_s8(a);
16179 }
16180 
16181 // CHECK-LABEL: @test_vreinterpret_p64_s16(
16182 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16183 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s16(int16x4_t a)16184 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
16185   return vreinterpret_p64_s16(a);
16186 }
16187 
16188 // CHECK-LABEL: @test_vreinterpret_p64_s32(
16189 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16190 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s32(int32x2_t a)16191 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
16192   return vreinterpret_p64_s32(a);
16193 }
16194 
16195 // CHECK-LABEL: @test_vreinterpret_p64_s64(
16196 // CHECK:   ret <1 x i64> %a
test_vreinterpret_p64_s64(int64x1_t a)16197 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
16198   return vreinterpret_p64_s64(a);
16199 }
16200 
16201 // CHECK-LABEL: @test_vreinterpret_p64_u8(
16202 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16203 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u8(uint8x8_t a)16204 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
16205   return vreinterpret_p64_u8(a);
16206 }
16207 
16208 // CHECK-LABEL: @test_vreinterpret_p64_u16(
16209 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16210 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u16(uint16x4_t a)16211 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
16212   return vreinterpret_p64_u16(a);
16213 }
16214 
16215 // CHECK-LABEL: @test_vreinterpret_p64_u32(
16216 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16217 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u32(uint32x2_t a)16218 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
16219   return vreinterpret_p64_u32(a);
16220 }
16221 
16222 // CHECK-LABEL: @test_vreinterpret_p64_u64(
16223 // CHECK:   ret <1 x i64> %a
test_vreinterpret_p64_u64(uint64x1_t a)16224 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
16225   return vreinterpret_p64_u64(a);
16226 }
16227 
16228 // CHECK-LABEL: @test_vreinterpret_p64_f16(
16229 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
16230 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f16(float16x4_t a)16231 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
16232   return vreinterpret_p64_f16(a);
16233 }
16234 
16235 // CHECK-LABEL: @test_vreinterpret_p64_f32(
16236 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
16237 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f32(float32x2_t a)16238 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
16239   return vreinterpret_p64_f32(a);
16240 }
16241 
16242 // CHECK-LABEL: @test_vreinterpret_p64_f64(
16243 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
16244 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f64(float64x1_t a)16245 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
16246   return vreinterpret_p64_f64(a);
16247 }
16248 
16249 // CHECK-LABEL: @test_vreinterpret_p64_p8(
16250 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16251 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p8(poly8x8_t a)16252 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
16253   return vreinterpret_p64_p8(a);
16254 }
16255 
16256 // CHECK-LABEL: @test_vreinterpret_p64_p16(
16257 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16258 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p16(poly16x4_t a)16259 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
16260   return vreinterpret_p64_p16(a);
16261 }
16262 
16263 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
16264 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16265 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)16266 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
16267   return vreinterpretq_s8_s16(a);
16268 }
16269 
16270 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
16271 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16272 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)16273 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
16274   return vreinterpretq_s8_s32(a);
16275 }
16276 
16277 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
16278 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16279 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)16280 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
16281   return vreinterpretq_s8_s64(a);
16282 }
16283 
16284 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
16285 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)16286 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
16287   return vreinterpretq_s8_u8(a);
16288 }
16289 
16290 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
16291 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16292 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)16293 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
16294   return vreinterpretq_s8_u16(a);
16295 }
16296 
16297 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
16298 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16299 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)16300 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
16301   return vreinterpretq_s8_u32(a);
16302 }
16303 
16304 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
16305 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16306 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)16307 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
16308   return vreinterpretq_s8_u64(a);
16309 }
16310 
16311 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
16312 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16313 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)16314 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
16315   return vreinterpretq_s8_f16(a);
16316 }
16317 
16318 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
16319 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16320 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)16321 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
16322   return vreinterpretq_s8_f32(a);
16323 }
16324 
16325 // CHECK-LABEL: @test_vreinterpretq_s8_f64(
16326 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16327 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f64(float64x2_t a)16328 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
16329   return vreinterpretq_s8_f64(a);
16330 }
16331 
16332 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
16333 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)16334 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
16335   return vreinterpretq_s8_p8(a);
16336 }
16337 
16338 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
16339 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16340 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)16341 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
16342   return vreinterpretq_s8_p16(a);
16343 }
16344 
16345 // CHECK-LABEL: @test_vreinterpretq_s8_p64(
16346 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16347 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p64(poly64x2_t a)16348 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
16349   return vreinterpretq_s8_p64(a);
16350 }
16351 
16352 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
16353 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16354 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)16355 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
16356   return vreinterpretq_s16_s8(a);
16357 }
16358 
16359 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
16360 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16361 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)16362 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
16363   return vreinterpretq_s16_s32(a);
16364 }
16365 
16366 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
16367 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16368 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)16369 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
16370   return vreinterpretq_s16_s64(a);
16371 }
16372 
16373 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
16374 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16375 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)16376 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
16377   return vreinterpretq_s16_u8(a);
16378 }
16379 
16380 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
16381 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)16382 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
16383   return vreinterpretq_s16_u16(a);
16384 }
16385 
16386 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
16387 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16388 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)16389 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
16390   return vreinterpretq_s16_u32(a);
16391 }
16392 
16393 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
16394 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16395 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)16396 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
16397   return vreinterpretq_s16_u64(a);
16398 }
16399 
16400 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
16401 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16402 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)16403 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
16404   return vreinterpretq_s16_f16(a);
16405 }
16406 
16407 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
16408 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16409 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)16410 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
16411   return vreinterpretq_s16_f32(a);
16412 }
16413 
16414 // CHECK-LABEL: @test_vreinterpretq_s16_f64(
16415 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16416 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f64(float64x2_t a)16417 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
16418   return vreinterpretq_s16_f64(a);
16419 }
16420 
16421 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
16422 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16423 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)16424 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
16425   return vreinterpretq_s16_p8(a);
16426 }
16427 
16428 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
16429 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)16430 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
16431   return vreinterpretq_s16_p16(a);
16432 }
16433 
16434 // CHECK-LABEL: @test_vreinterpretq_s16_p64(
16435 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16436 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p64(poly64x2_t a)16437 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
16438   return vreinterpretq_s16_p64(a);
16439 }
16440 
16441 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
16442 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16443 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)16444 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
16445   return vreinterpretq_s32_s8(a);
16446 }
16447 
16448 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
16449 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16450 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)16451 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
16452   return vreinterpretq_s32_s16(a);
16453 }
16454 
16455 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
16456 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16457 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)16458 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
16459   return vreinterpretq_s32_s64(a);
16460 }
16461 
16462 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
16463 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16464 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)16465 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
16466   return vreinterpretq_s32_u8(a);
16467 }
16468 
16469 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
16470 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16471 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)16472 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
16473   return vreinterpretq_s32_u16(a);
16474 }
16475 
16476 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
16477 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)16478 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
16479   return vreinterpretq_s32_u32(a);
16480 }
16481 
16482 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
16483 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16484 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)16485 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
16486   return vreinterpretq_s32_u64(a);
16487 }
16488 
16489 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
16490 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16491 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)16492 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
16493   return vreinterpretq_s32_f16(a);
16494 }
16495 
16496 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
16497 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16498 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)16499 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
16500   return vreinterpretq_s32_f32(a);
16501 }
16502 
16503 // CHECK-LABEL: @test_vreinterpretq_s32_f64(
16504 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16505 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f64(float64x2_t a)16506 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
16507   return vreinterpretq_s32_f64(a);
16508 }
16509 
16510 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
16511 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16512 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)16513 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
16514   return vreinterpretq_s32_p8(a);
16515 }
16516 
16517 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
16518 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16519 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)16520 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
16521   return vreinterpretq_s32_p16(a);
16522 }
16523 
16524 // CHECK-LABEL: @test_vreinterpretq_s32_p64(
16525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16526 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p64(poly64x2_t a)16527 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
16528   return vreinterpretq_s32_p64(a);
16529 }
16530 
16531 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
16532 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16533 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)16534 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
16535   return vreinterpretq_s64_s8(a);
16536 }
16537 
16538 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
16539 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16540 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)16541 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
16542   return vreinterpretq_s64_s16(a);
16543 }
16544 
16545 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
16546 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16547 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)16548 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
16549   return vreinterpretq_s64_s32(a);
16550 }
16551 
16552 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
16553 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16554 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)16555 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
16556   return vreinterpretq_s64_u8(a);
16557 }
16558 
16559 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
16560 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16561 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)16562 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
16563   return vreinterpretq_s64_u16(a);
16564 }
16565 
16566 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
16567 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16568 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)16569 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
16570   return vreinterpretq_s64_u32(a);
16571 }
16572 
16573 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
16574 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)16575 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
16576   return vreinterpretq_s64_u64(a);
16577 }
16578 
16579 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
16580 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16581 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)16582 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
16583   return vreinterpretq_s64_f16(a);
16584 }
16585 
16586 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
16587 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16588 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)16589 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
16590   return vreinterpretq_s64_f32(a);
16591 }
16592 
16593 // CHECK-LABEL: @test_vreinterpretq_s64_f64(
16594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16595 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f64(float64x2_t a)16596 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
16597   return vreinterpretq_s64_f64(a);
16598 }
16599 
16600 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
16601 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16602 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)16603 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
16604   return vreinterpretq_s64_p8(a);
16605 }
16606 
16607 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
16608 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16609 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)16610 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
16611   return vreinterpretq_s64_p16(a);
16612 }
16613 
16614 // CHECK-LABEL: @test_vreinterpretq_s64_p64(
16615 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_p64(poly64x2_t a)16616 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
16617   return vreinterpretq_s64_p64(a);
16618 }
16619 
16620 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
16621 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)16622 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
16623   return vreinterpretq_u8_s8(a);
16624 }
16625 
16626 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
16627 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16628 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)16629 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
16630   return vreinterpretq_u8_s16(a);
16631 }
16632 
16633 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
16634 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16635 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)16636 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
16637   return vreinterpretq_u8_s32(a);
16638 }
16639 
16640 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
16641 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16642 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)16643 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
16644   return vreinterpretq_u8_s64(a);
16645 }
16646 
16647 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
16648 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16649 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)16650 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
16651   return vreinterpretq_u8_u16(a);
16652 }
16653 
16654 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
16655 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16656 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)16657 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
16658   return vreinterpretq_u8_u32(a);
16659 }
16660 
16661 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
16662 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16663 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)16664 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
16665   return vreinterpretq_u8_u64(a);
16666 }
16667 
16668 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
16669 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16670 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)16671 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
16672   return vreinterpretq_u8_f16(a);
16673 }
16674 
16675 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
16676 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16677 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)16678 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
16679   return vreinterpretq_u8_f32(a);
16680 }
16681 
16682 // CHECK-LABEL: @test_vreinterpretq_u8_f64(
16683 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16684 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f64(float64x2_t a)16685 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
16686   return vreinterpretq_u8_f64(a);
16687 }
16688 
16689 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
16690 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)16691 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
16692   return vreinterpretq_u8_p8(a);
16693 }
16694 
16695 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
16696 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16697 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)16698 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
16699   return vreinterpretq_u8_p16(a);
16700 }
16701 
16702 // CHECK-LABEL: @test_vreinterpretq_u8_p64(
16703 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16704 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p64(poly64x2_t a)16705 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
16706   return vreinterpretq_u8_p64(a);
16707 }
16708 
16709 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
16710 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16711 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)16712 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
16713   return vreinterpretq_u16_s8(a);
16714 }
16715 
16716 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
16717 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)16718 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
16719   return vreinterpretq_u16_s16(a);
16720 }
16721 
16722 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
16723 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16724 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)16725 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
16726   return vreinterpretq_u16_s32(a);
16727 }
16728 
16729 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
16730 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16731 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)16732 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
16733   return vreinterpretq_u16_s64(a);
16734 }
16735 
16736 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
16737 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16738 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)16739 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
16740   return vreinterpretq_u16_u8(a);
16741 }
16742 
16743 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
16744 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16745 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)16746 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
16747   return vreinterpretq_u16_u32(a);
16748 }
16749 
16750 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
16751 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16752 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)16753 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
16754   return vreinterpretq_u16_u64(a);
16755 }
16756 
16757 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
16758 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16759 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)16760 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
16761   return vreinterpretq_u16_f16(a);
16762 }
16763 
16764 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
16765 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16766 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)16767 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
16768   return vreinterpretq_u16_f32(a);
16769 }
16770 
16771 // CHECK-LABEL: @test_vreinterpretq_u16_f64(
16772 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16773 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f64(float64x2_t a)16774 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
16775   return vreinterpretq_u16_f64(a);
16776 }
16777 
16778 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
16779 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16780 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)16781 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
16782   return vreinterpretq_u16_p8(a);
16783 }
16784 
16785 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
16786 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)16787 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
16788   return vreinterpretq_u16_p16(a);
16789 }
16790 
16791 // CHECK-LABEL: @test_vreinterpretq_u16_p64(
16792 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16793 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p64(poly64x2_t a)16794 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
16795   return vreinterpretq_u16_p64(a);
16796 }
16797 
16798 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
16799 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16800 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)16801 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
16802   return vreinterpretq_u32_s8(a);
16803 }
16804 
16805 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
16806 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16807 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)16808 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
16809   return vreinterpretq_u32_s16(a);
16810 }
16811 
16812 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
16813 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)16814 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
16815   return vreinterpretq_u32_s32(a);
16816 }
16817 
16818 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
16819 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16820 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)16821 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
16822   return vreinterpretq_u32_s64(a);
16823 }
16824 
16825 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
16826 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16827 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)16828 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
16829   return vreinterpretq_u32_u8(a);
16830 }
16831 
16832 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
16833 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16834 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)16835 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
16836   return vreinterpretq_u32_u16(a);
16837 }
16838 
16839 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
16840 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16841 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)16842 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
16843   return vreinterpretq_u32_u64(a);
16844 }
16845 
16846 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
16847 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16848 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)16849 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
16850   return vreinterpretq_u32_f16(a);
16851 }
16852 
16853 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
16854 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16855 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)16856 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
16857   return vreinterpretq_u32_f32(a);
16858 }
16859 
16860 // CHECK-LABEL: @test_vreinterpretq_u32_f64(
16861 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16862 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f64(float64x2_t a)16863 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
16864   return vreinterpretq_u32_f64(a);
16865 }
16866 
16867 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
16868 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16869 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)16870 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
16871   return vreinterpretq_u32_p8(a);
16872 }
16873 
16874 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
16875 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16876 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)16877 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
16878   return vreinterpretq_u32_p16(a);
16879 }
16880 
16881 // CHECK-LABEL: @test_vreinterpretq_u32_p64(
16882 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16883 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p64(poly64x2_t a)16884 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
16885   return vreinterpretq_u32_p64(a);
16886 }
16887 
16888 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
16889 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16890 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)16891 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
16892   return vreinterpretq_u64_s8(a);
16893 }
16894 
16895 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
16896 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16897 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)16898 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
16899   return vreinterpretq_u64_s16(a);
16900 }
16901 
16902 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
16903 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16904 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)16905 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
16906   return vreinterpretq_u64_s32(a);
16907 }
16908 
16909 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
16910 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)16911 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
16912   return vreinterpretq_u64_s64(a);
16913 }
16914 
16915 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
16916 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16917 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)16918 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
16919   return vreinterpretq_u64_u8(a);
16920 }
16921 
16922 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
16923 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16924 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)16925 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
16926   return vreinterpretq_u64_u16(a);
16927 }
16928 
16929 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
16930 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16931 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)16932 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
16933   return vreinterpretq_u64_u32(a);
16934 }
16935 
16936 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
16937 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16938 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)16939 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
16940   return vreinterpretq_u64_f16(a);
16941 }
16942 
16943 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
16944 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16945 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)16946 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
16947   return vreinterpretq_u64_f32(a);
16948 }
16949 
16950 // CHECK-LABEL: @test_vreinterpretq_u64_f64(
16951 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16952 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f64(float64x2_t a)16953 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
16954   return vreinterpretq_u64_f64(a);
16955 }
16956 
16957 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
16958 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16959 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)16960 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
16961   return vreinterpretq_u64_p8(a);
16962 }
16963 
16964 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
16965 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16966 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)16967 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
16968   return vreinterpretq_u64_p16(a);
16969 }
16970 
16971 // CHECK-LABEL: @test_vreinterpretq_u64_p64(
16972 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_p64(poly64x2_t a)16973 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
16974   return vreinterpretq_u64_p64(a);
16975 }
16976 
16977 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
16978 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16979 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)16980 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
16981   return vreinterpretq_f16_s8(a);
16982 }
16983 
16984 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
16985 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16986 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)16987 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
16988   return vreinterpretq_f16_s16(a);
16989 }
16990 
16991 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
16992 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
16993 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)16994 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
16995   return vreinterpretq_f16_s32(a);
16996 }
16997 
16998 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
16999 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17000 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)17001 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
17002   return vreinterpretq_f16_s64(a);
17003 }
17004 
17005 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
17006 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
17007 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)17008 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
17009   return vreinterpretq_f16_u8(a);
17010 }
17011 
17012 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
17013 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17014 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)17015 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
17016   return vreinterpretq_f16_u16(a);
17017 }
17018 
17019 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
17020 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
17021 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)17022 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
17023   return vreinterpretq_f16_u32(a);
17024 }
17025 
17026 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
17027 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17028 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)17029 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
17030   return vreinterpretq_f16_u64(a);
17031 }
17032 
17033 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
17034 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
17035 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)17036 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
17037   return vreinterpretq_f16_f32(a);
17038 }
17039 
17040 // CHECK-LABEL: @test_vreinterpretq_f16_f64(
17041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
17042 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f64(float64x2_t a)17043 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
17044   return vreinterpretq_f16_f64(a);
17045 }
17046 
17047 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
17048 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
17049 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)17050 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
17051   return vreinterpretq_f16_p8(a);
17052 }
17053 
17054 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
17055 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17056 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)17057 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
17058   return vreinterpretq_f16_p16(a);
17059 }
17060 
17061 // CHECK-LABEL: @test_vreinterpretq_f16_p64(
17062 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17063 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p64(poly64x2_t a)17064 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
17065   return vreinterpretq_f16_p64(a);
17066 }
17067 
17068 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
17069 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17070 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)17071 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
17072   return vreinterpretq_f32_s8(a);
17073 }
17074 
17075 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
17076 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17077 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)17078 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
17079   return vreinterpretq_f32_s16(a);
17080 }
17081 
17082 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
17083 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17084 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)17085 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
17086   return vreinterpretq_f32_s32(a);
17087 }
17088 
17089 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
17090 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17091 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)17092 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
17093   return vreinterpretq_f32_s64(a);
17094 }
17095 
17096 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
17097 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17098 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)17099 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
17100   return vreinterpretq_f32_u8(a);
17101 }
17102 
17103 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
17104 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17105 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)17106 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
17107   return vreinterpretq_f32_u16(a);
17108 }
17109 
17110 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
17111 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17112 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)17113 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
17114   return vreinterpretq_f32_u32(a);
17115 }
17116 
17117 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
17118 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17119 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)17120 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
17121   return vreinterpretq_f32_u64(a);
17122 }
17123 
17124 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
17125 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
17126 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)17127 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
17128   return vreinterpretq_f32_f16(a);
17129 }
17130 
17131 // CHECK-LABEL: @test_vreinterpretq_f32_f64(
17132 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
17133 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f64(float64x2_t a)17134 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
17135   return vreinterpretq_f32_f64(a);
17136 }
17137 
17138 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
17139 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17140 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)17141 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
17142   return vreinterpretq_f32_p8(a);
17143 }
17144 
17145 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
17146 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17147 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)17148 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
17149   return vreinterpretq_f32_p16(a);
17150 }
17151 
17152 // CHECK-LABEL: @test_vreinterpretq_f32_p64(
17153 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17154 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p64(poly64x2_t a)17155 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
17156   return vreinterpretq_f32_p64(a);
17157 }
17158 
17159 // CHECK-LABEL: @test_vreinterpretq_f64_s8(
17160 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17161 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s8(int8x16_t a)17162 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
17163   return vreinterpretq_f64_s8(a);
17164 }
17165 
17166 // CHECK-LABEL: @test_vreinterpretq_f64_s16(
17167 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17168 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s16(int16x8_t a)17169 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
17170   return vreinterpretq_f64_s16(a);
17171 }
17172 
17173 // CHECK-LABEL: @test_vreinterpretq_f64_s32(
17174 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17175 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s32(int32x4_t a)17176 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
17177   return vreinterpretq_f64_s32(a);
17178 }
17179 
17180 // CHECK-LABEL: @test_vreinterpretq_f64_s64(
17181 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17182 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s64(int64x2_t a)17183 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
17184   return vreinterpretq_f64_s64(a);
17185 }
17186 
17187 // CHECK-LABEL: @test_vreinterpretq_f64_u8(
17188 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17189 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u8(uint8x16_t a)17190 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
17191   return vreinterpretq_f64_u8(a);
17192 }
17193 
17194 // CHECK-LABEL: @test_vreinterpretq_f64_u16(
17195 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17196 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u16(uint16x8_t a)17197 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
17198   return vreinterpretq_f64_u16(a);
17199 }
17200 
17201 // CHECK-LABEL: @test_vreinterpretq_f64_u32(
17202 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17203 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u32(uint32x4_t a)17204 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
17205   return vreinterpretq_f64_u32(a);
17206 }
17207 
17208 // CHECK-LABEL: @test_vreinterpretq_f64_u64(
17209 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17210 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u64(uint64x2_t a)17211 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
17212   return vreinterpretq_f64_u64(a);
17213 }
17214 
17215 // CHECK-LABEL: @test_vreinterpretq_f64_f16(
17216 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
17217 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f16(float16x8_t a)17218 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
17219   return vreinterpretq_f64_f16(a);
17220 }
17221 
17222 // CHECK-LABEL: @test_vreinterpretq_f64_f32(
17223 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
17224 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f32(float32x4_t a)17225 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
17226   return vreinterpretq_f64_f32(a);
17227 }
17228 
17229 // CHECK-LABEL: @test_vreinterpretq_f64_p8(
17230 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17231 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p8(poly8x16_t a)17232 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
17233   return vreinterpretq_f64_p8(a);
17234 }
17235 
17236 // CHECK-LABEL: @test_vreinterpretq_f64_p16(
17237 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17238 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p16(poly16x8_t a)17239 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
17240   return vreinterpretq_f64_p16(a);
17241 }
17242 
17243 // CHECK-LABEL: @test_vreinterpretq_f64_p64(
17244 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17245 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p64(poly64x2_t a)17246 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
17247   return vreinterpretq_f64_p64(a);
17248 }
17249 
17250 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
17251 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)17252 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
17253   return vreinterpretq_p8_s8(a);
17254 }
17255 
17256 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
17257 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17258 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)17259 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
17260   return vreinterpretq_p8_s16(a);
17261 }
17262 
17263 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
17264 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17265 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)17266 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
17267   return vreinterpretq_p8_s32(a);
17268 }
17269 
17270 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
17271 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17272 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)17273 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
17274   return vreinterpretq_p8_s64(a);
17275 }
17276 
17277 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
17278 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)17279 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
17280   return vreinterpretq_p8_u8(a);
17281 }
17282 
17283 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
17284 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17285 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)17286 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
17287   return vreinterpretq_p8_u16(a);
17288 }
17289 
17290 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
17291 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17292 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)17293 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
17294   return vreinterpretq_p8_u32(a);
17295 }
17296 
17297 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
17298 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17299 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)17300 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
17301   return vreinterpretq_p8_u64(a);
17302 }
17303 
17304 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
17305 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
17306 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)17307 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
17308   return vreinterpretq_p8_f16(a);
17309 }
17310 
17311 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
17312 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
17313 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)17314 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
17315   return vreinterpretq_p8_f32(a);
17316 }
17317 
17318 // CHECK-LABEL: @test_vreinterpretq_p8_f64(
17319 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17320 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f64(float64x2_t a)17321 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
17322   return vreinterpretq_p8_f64(a);
17323 }
17324 
17325 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
17326 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17327 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)17328 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
17329   return vreinterpretq_p8_p16(a);
17330 }
17331 
17332 // CHECK-LABEL: @test_vreinterpretq_p8_p64(
17333 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17334 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p64(poly64x2_t a)17335 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
17336   return vreinterpretq_p8_p64(a);
17337 }
17338 
17339 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
17340 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17341 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)17342 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
17343   return vreinterpretq_p16_s8(a);
17344 }
17345 
17346 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
17347 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)17348 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
17349   return vreinterpretq_p16_s16(a);
17350 }
17351 
17352 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
17353 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17354 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)17355 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
17356   return vreinterpretq_p16_s32(a);
17357 }
17358 
17359 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
17360 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17361 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)17362 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
17363   return vreinterpretq_p16_s64(a);
17364 }
17365 
17366 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
17367 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17368 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)17369 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
17370   return vreinterpretq_p16_u8(a);
17371 }
17372 
17373 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
17374 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)17375 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
17376   return vreinterpretq_p16_u16(a);
17377 }
17378 
17379 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
17380 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17381 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)17382 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
17383   return vreinterpretq_p16_u32(a);
17384 }
17385 
17386 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
17387 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17388 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)17389 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
17390   return vreinterpretq_p16_u64(a);
17391 }
17392 
17393 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
17394 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
17395 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)17396 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
17397   return vreinterpretq_p16_f16(a);
17398 }
17399 
17400 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
17401 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
17402 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)17403 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
17404   return vreinterpretq_p16_f32(a);
17405 }
17406 
17407 // CHECK-LABEL: @test_vreinterpretq_p16_f64(
17408 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
17409 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f64(float64x2_t a)17410 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
17411   return vreinterpretq_p16_f64(a);
17412 }
17413 
17414 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
17415 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17416 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)17417 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
17418   return vreinterpretq_p16_p8(a);
17419 }
17420 
17421 // CHECK-LABEL: @test_vreinterpretq_p16_p64(
17422 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17423 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p64(poly64x2_t a)17424 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
17425   return vreinterpretq_p16_p64(a);
17426 }
17427 
17428 // CHECK-LABEL: @test_vreinterpretq_p64_s8(
17429 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17430 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s8(int8x16_t a)17431 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
17432   return vreinterpretq_p64_s8(a);
17433 }
17434 
17435 // CHECK-LABEL: @test_vreinterpretq_p64_s16(
17436 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17437 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s16(int16x8_t a)17438 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
17439   return vreinterpretq_p64_s16(a);
17440 }
17441 
17442 // CHECK-LABEL: @test_vreinterpretq_p64_s32(
17443 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17444 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s32(int32x4_t a)17445 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
17446   return vreinterpretq_p64_s32(a);
17447 }
17448 
17449 // CHECK-LABEL: @test_vreinterpretq_p64_s64(
17450 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_p64_s64(int64x2_t a)17451 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
17452   return vreinterpretq_p64_s64(a);
17453 }
17454 
17455 // CHECK-LABEL: @test_vreinterpretq_p64_u8(
17456 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17457 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u8(uint8x16_t a)17458 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
17459   return vreinterpretq_p64_u8(a);
17460 }
17461 
17462 // CHECK-LABEL: @test_vreinterpretq_p64_u16(
17463 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17464 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u16(uint16x8_t a)17465 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
17466   return vreinterpretq_p64_u16(a);
17467 }
17468 
17469 // CHECK-LABEL: @test_vreinterpretq_p64_u32(
17470 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17471 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u32(uint32x4_t a)17472 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
17473   return vreinterpretq_p64_u32(a);
17474 }
17475 
17476 // CHECK-LABEL: @test_vreinterpretq_p64_u64(
17477 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_p64_u64(uint64x2_t a)17478 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
17479   return vreinterpretq_p64_u64(a);
17480 }
17481 
17482 // CHECK-LABEL: @test_vreinterpretq_p64_f16(
17483 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
17484 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f16(float16x8_t a)17485 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
17486   return vreinterpretq_p64_f16(a);
17487 }
17488 
17489 // CHECK-LABEL: @test_vreinterpretq_p64_f32(
17490 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
17491 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f32(float32x4_t a)17492 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
17493   return vreinterpretq_p64_f32(a);
17494 }
17495 
17496 // CHECK-LABEL: @test_vreinterpretq_p64_f64(
17497 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
17498 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f64(float64x2_t a)17499 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
17500   return vreinterpretq_p64_f64(a);
17501 }
17502 
17503 // CHECK-LABEL: @test_vreinterpretq_p64_p8(
17504 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17505 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p8(poly8x16_t a)17506 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
17507   return vreinterpretq_p64_p8(a);
17508 }
17509 
17510 // CHECK-LABEL: @test_vreinterpretq_p64_p16(
17511 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17512 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p16(poly16x8_t a)17513 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
17514   return vreinterpretq_p64_p16(a);
17515 }
17516 
17517 // CHECK-LABEL: @test_vabds_f32(
17518 // CHECK:   [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b)
17519 // CHECK:   ret float [[VABDS_F32_I]]
test_vabds_f32(float32_t a,float32_t b)17520 float32_t test_vabds_f32(float32_t a, float32_t b) {
17521   return vabds_f32(a, b);
17522 }
17523 
17524 // CHECK-LABEL: @test_vabdd_f64(
17525 // CHECK:   [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b)
17526 // CHECK:   ret double [[VABDD_F64_I]]
test_vabdd_f64(float64_t a,float64_t b)17527 float64_t test_vabdd_f64(float64_t a, float64_t b) {
17528   return vabdd_f64(a, b);
17529 }
17530 
17531 // CHECK-LABEL: @test_vuqaddq_s8(
17532 // CHECK: entry:
17533 // CHECK-NEXT:  [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17534 // CHECK-NEXT:  ret <16 x i8> [[V]]
test_vuqaddq_s8(int8x16_t a,uint8x16_t b)17535 int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) {
17536   return vuqaddq_s8(a, b);
17537 }
17538 
17539 // CHECK-LABEL: @test_vuqaddq_s32(
17540 // CHECK: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17541 // CHECK-NEXT:  ret <4 x i32> [[V]]
test_vuqaddq_s32(int32x4_t a,uint32x4_t b)17542 int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) {
17543   return vuqaddq_s32(a, b);
17544 }
17545 
17546 // CHECK-LABEL: @test_vuqaddq_s64(
17547 // CHECK: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17548 // CHECK-NEXT:  ret <2 x i64> [[V]]
test_vuqaddq_s64(int64x2_t a,uint64x2_t b)17549 int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) {
17550   return vuqaddq_s64(a, b);
17551 }
17552 
17553 // CHECK-LABEL: @test_vuqaddq_s16(
17554 // CHECK: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17555 // CHECK-NEXT:  ret <8 x i16> [[V]]
test_vuqaddq_s16(int16x8_t a,uint16x8_t b)17556 int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) {
17557   return vuqaddq_s16(a, b);
17558 }
17559 
17560 // CHECK-LABEL: @test_vuqadd_s8(
17561 // CHECK: entry:
17562 // CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17563 // CHECK-NEXT: ret <8 x i8> [[V]]
test_vuqadd_s8(int8x8_t a,uint8x8_t b)17564 int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) {
17565   return vuqadd_s8(a, b);
17566 }
17567 
17568 // CHECK-LABEL: @test_vuqadd_s32(
17569 // CHECK: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17570 // CHECK-NEXT:  ret <2 x i32> [[V]]
test_vuqadd_s32(int32x2_t a,uint32x2_t b)17571 int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) {
17572   return vuqadd_s32(a, b);
17573 }
17574 
17575 // CHECK-LABEL: @test_vuqadd_s64(
17576 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17577 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17578 // CHECK:   [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17579 // CHECK:   ret <1 x i64> [[VUQADD2_I]]
test_vuqadd_s64(int64x1_t a,uint64x1_t b)17580 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
17581   return vuqadd_s64(a, b);
17582 }
17583 
17584 // CHECK-LABEL: @test_vuqadd_s16(
17585 // CHECK: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17586 // CHECK-NEXT:  ret <4 x i16> [[V]]
test_vuqadd_s16(int16x4_t a,uint16x4_t b)17587 int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) {
17588   return vuqadd_s16(a, b);
17589 }
17590 
17591 // CHECK-LABEL: @test_vsqadd_u64(
17592 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17593 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17594 // CHECK:   [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17595 // CHECK:   ret <1 x i64> [[VSQADD2_I]]
test_vsqadd_u64(uint64x1_t a,int64x1_t b)17596 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
17597   return vsqadd_u64(a, b);
17598 }
17599 
17600 // CHECK-LABEL: @test_vsqadd_u8(
17601 // CHECK:   [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17602 // CHECK:   ret <8 x i8> [[VSQADD_I]]
test_vsqadd_u8(uint8x8_t a,int8x8_t b)17603 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
17604   return vsqadd_u8(a, b);
17605 }
17606 
17607 // CHECK-LABEL: @test_vsqaddq_u8(
17608 // CHECK:   [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17609 // CHECK:   ret <16 x i8> [[VSQADD_I]]
test_vsqaddq_u8(uint8x16_t a,int8x16_t b)17610 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
17611   return vsqaddq_u8(a, b);
17612 }
17613 
17614 // CHECK-LABEL: @test_vsqadd_u16(
17615 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17616 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17617 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17618 // CHECK:   ret <4 x i16> [[VSQADD2_I]]
test_vsqadd_u16(uint16x4_t a,int16x4_t b)17619 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
17620   return vsqadd_u16(a, b);
17621 }
17622 
17623 // CHECK-LABEL: @test_vsqaddq_u16(
17624 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17625 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17626 // CHECK:   [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17627 // CHECK:   ret <8 x i16> [[VSQADD2_I]]
test_vsqaddq_u16(uint16x8_t a,int16x8_t b)17628 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
17629   return vsqaddq_u16(a, b);
17630 }
17631 
17632 // CHECK-LABEL: @test_vsqadd_u32(
17633 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17634 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17635 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17636 // CHECK:   ret <2 x i32> [[VSQADD2_I]]
test_vsqadd_u32(uint32x2_t a,int32x2_t b)17637 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
17638   return vsqadd_u32(a, b);
17639 }
17640 
17641 // CHECK-LABEL: @test_vsqaddq_u32(
17642 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17643 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17644 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17645 // CHECK:   ret <4 x i32> [[VSQADD2_I]]
test_vsqaddq_u32(uint32x4_t a,int32x4_t b)17646 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
17647   return vsqaddq_u32(a, b);
17648 }
17649 
17650 // CHECK-LABEL: @test_vsqaddq_u64(
17651 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17652 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17653 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17654 // CHECK:   ret <2 x i64> [[VSQADD2_I]]
test_vsqaddq_u64(uint64x2_t a,int64x2_t b)17655 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
17656   return vsqaddq_u64(a, b);
17657 }
17658 
17659 // CHECK-LABEL: @test_vabs_s64(
17660 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17661 // CHECK:   [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
17662 // CHECK:   ret <1 x i64> [[VABS1_I]]
test_vabs_s64(int64x1_t a)17663 int64x1_t test_vabs_s64(int64x1_t a) {
17664   return vabs_s64(a);
17665 }
17666 
17667 // CHECK-LABEL: @test_vqabs_s64(
17668 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17669 // CHECK:   [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
17670 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
17671 // CHECK:   ret <1 x i64> [[VQABS_V1_I]]
test_vqabs_s64(int64x1_t a)17672 int64x1_t test_vqabs_s64(int64x1_t a) {
17673   return vqabs_s64(a);
17674 }
17675 
17676 // CHECK-LABEL: @test_vqneg_s64(
17677 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17678 // CHECK:   [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
17679 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
17680 // CHECK:   ret <1 x i64> [[VQNEG_V1_I]]
test_vqneg_s64(int64x1_t a)17681 int64x1_t test_vqneg_s64(int64x1_t a) {
17682   return vqneg_s64(a);
17683 }
17684 
17685 // CHECK-LABEL: @test_vneg_s64(
17686 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
17687 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vneg_s64(int64x1_t a)17688 int64x1_t test_vneg_s64(int64x1_t a) {
17689   return vneg_s64(a);
17690 }
17691 
17692 // CHECK-LABEL: @test_vaddv_f32(
17693 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17694 // CHECK:   [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
17695 // CHECK:   ret float [[VADDV_F32_I]]
test_vaddv_f32(float32x2_t a)17696 float32_t test_vaddv_f32(float32x2_t a) {
17697   return vaddv_f32(a);
17698 }
17699 
17700 // CHECK-LABEL: @test_vaddvq_f32(
17701 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
17702 // CHECK:   [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
17703 // CHECK:   ret float [[VADDVQ_F32_I]]
test_vaddvq_f32(float32x4_t a)17704 float32_t test_vaddvq_f32(float32x4_t a) {
17705   return vaddvq_f32(a);
17706 }
17707 
17708 // CHECK-LABEL: @test_vaddvq_f64(
17709 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17710 // CHECK:   [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
17711 // CHECK:   ret double [[VADDVQ_F64_I]]
test_vaddvq_f64(float64x2_t a)17712 float64_t test_vaddvq_f64(float64x2_t a) {
17713   return vaddvq_f64(a);
17714 }
17715 
17716 // CHECK-LABEL: @test_vmaxv_f32(
17717 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17718 // CHECK:   [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
17719 // CHECK:   ret float [[VMAXV_F32_I]]
test_vmaxv_f32(float32x2_t a)17720 float32_t test_vmaxv_f32(float32x2_t a) {
17721   return vmaxv_f32(a);
17722 }
17723 
17724 // CHECK-LABEL: @test_vmaxvq_f64(
17725 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17726 // CHECK:   [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
17727 // CHECK:   ret double [[VMAXVQ_F64_I]]
test_vmaxvq_f64(float64x2_t a)17728 float64_t test_vmaxvq_f64(float64x2_t a) {
17729   return vmaxvq_f64(a);
17730 }
17731 
17732 // CHECK-LABEL: @test_vminv_f32(
17733 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17734 // CHECK:   [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
17735 // CHECK:   ret float [[VMINV_F32_I]]
test_vminv_f32(float32x2_t a)17736 float32_t test_vminv_f32(float32x2_t a) {
17737   return vminv_f32(a);
17738 }
17739 
17740 // CHECK-LABEL: @test_vminvq_f64(
17741 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17742 // CHECK:   [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
17743 // CHECK:   ret double [[VMINVQ_F64_I]]
test_vminvq_f64(float64x2_t a)17744 float64_t test_vminvq_f64(float64x2_t a) {
17745   return vminvq_f64(a);
17746 }
17747 
17748 // CHECK-LABEL: @test_vmaxnmvq_f64(
17749 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17750 // CHECK:   [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
17751 // CHECK:   ret double [[VMAXNMVQ_F64_I]]
test_vmaxnmvq_f64(float64x2_t a)17752 float64_t test_vmaxnmvq_f64(float64x2_t a) {
17753   return vmaxnmvq_f64(a);
17754 }
17755 
17756 // CHECK-LABEL: @test_vmaxnmv_f32(
17757 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17758 // CHECK:   [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
17759 // CHECK:   ret float [[VMAXNMV_F32_I]]
test_vmaxnmv_f32(float32x2_t a)17760 float32_t test_vmaxnmv_f32(float32x2_t a) {
17761   return vmaxnmv_f32(a);
17762 }
17763 
17764 // CHECK-LABEL: @test_vminnmvq_f64(
17765 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17766 // CHECK:   [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
17767 // CHECK:   ret double [[VMINNMVQ_F64_I]]
test_vminnmvq_f64(float64x2_t a)17768 float64_t test_vminnmvq_f64(float64x2_t a) {
17769   return vminnmvq_f64(a);
17770 }
17771 
17772 // CHECK-LABEL: @test_vminnmv_f32(
17773 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
17774 // CHECK:   [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
17775 // CHECK:   ret float [[VMINNMV_F32_I]]
test_vminnmv_f32(float32x2_t a)17776 float32_t test_vminnmv_f32(float32x2_t a) {
17777   return vminnmv_f32(a);
17778 }
17779 
17780 // CHECK-LABEL: @test_vpaddq_s64(
17781 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17782 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17783 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17784 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17785 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_s64(int64x2_t a,int64x2_t b)17786 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
17787   return vpaddq_s64(a, b);
17788 }
17789 
17790 // CHECK-LABEL: @test_vpaddq_u64(
17791 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17792 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17793 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17794 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17795 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_u64(uint64x2_t a,uint64x2_t b)17796 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
17797   return vpaddq_u64(a, b);
17798 }
17799 
17800 // CHECK-LABEL: @test_vpaddd_u64(
17801 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17802 // CHECK:   [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17803 // CHECK:   ret i64 [[VPADDD_U64_I]]
test_vpaddd_u64(uint64x2_t a)17804 uint64_t test_vpaddd_u64(uint64x2_t a) {
17805   return vpaddd_u64(a);
17806 }
17807 
17808 // CHECK-LABEL: @test_vaddvq_s64(
17809 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17810 // CHECK:   [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
17811 // CHECK:   ret i64 [[VADDVQ_S64_I]]
test_vaddvq_s64(int64x2_t a)17812 int64_t test_vaddvq_s64(int64x2_t a) {
17813   return vaddvq_s64(a);
17814 }
17815 
17816 // CHECK-LABEL: @test_vaddvq_u64(
17817 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17818 // CHECK:   [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17819 // CHECK:   ret i64 [[VADDVQ_U64_I]]
test_vaddvq_u64(uint64x2_t a)17820 uint64_t test_vaddvq_u64(uint64x2_t a) {
17821   return vaddvq_u64(a);
17822 }
17823 
17824 // CHECK-LABEL: @test_vadd_f64(
17825 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, %b
17826 // CHECK:   ret <1 x double> [[ADD_I]]
test_vadd_f64(float64x1_t a,float64x1_t b)17827 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
17828   return vadd_f64(a, b);
17829 }
17830 
17831 // CHECK-LABEL: @test_vmul_f64(
17832 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %a, %b
17833 // CHECK:   ret <1 x double> [[MUL_I]]
test_vmul_f64(float64x1_t a,float64x1_t b)17834 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
17835   return vmul_f64(a, b);
17836 }
17837 
17838 // CHECK-LABEL: @test_vdiv_f64(
17839 // CHECK:   [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
17840 // CHECK:   ret <1 x double> [[DIV_I]]
test_vdiv_f64(float64x1_t a,float64x1_t b)17841 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
17842   return vdiv_f64(a, b);
17843 }
17844 
17845 // CHECK-LABEL: @test_vmla_f64(
17846 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17847 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
17848 // CHECK:   ret <1 x double> [[ADD_I]]
test_vmla_f64(float64x1_t a,float64x1_t b,float64x1_t c)17849 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17850   return vmla_f64(a, b, c);
17851 }
17852 
17853 // CHECK-LABEL: @test_vmls_f64(
17854 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17855 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
17856 // CHECK:   ret <1 x double> [[SUB_I]]
test_vmls_f64(float64x1_t a,float64x1_t b,float64x1_t c)17857 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17858   return vmls_f64(a, b, c);
17859 }
17860 
17861 // CHECK-LABEL: @test_vfma_f64(
17862 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17863 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17864 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17865 // CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
17866 // CHECK:   ret <1 x double> [[TMP3]]
test_vfma_f64(float64x1_t a,float64x1_t b,float64x1_t c)17867 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17868   return vfma_f64(a, b, c);
17869 }
17870 
17871 // CHECK-LABEL: @test_vfms_f64(
17872 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
17873 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17874 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
17875 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17876 // CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
17877 // CHECK:   ret <1 x double> [[TMP3]]
test_vfms_f64(float64x1_t a,float64x1_t b,float64x1_t c)17878 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17879   return vfms_f64(a, b, c);
17880 }
17881 
17882 // CHECK-LABEL: @test_vsub_f64(
17883 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, %b
17884 // CHECK:   ret <1 x double> [[SUB_I]]
test_vsub_f64(float64x1_t a,float64x1_t b)17885 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
17886   return vsub_f64(a, b);
17887 }
17888 
17889 // CHECK-LABEL: @test_vabd_f64(
17890 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17891 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17892 // CHECK:   [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
17893 // CHECK:   ret <1 x double> [[VABD2_I]]
test_vabd_f64(float64x1_t a,float64x1_t b)17894 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
17895   return vabd_f64(a, b);
17896 }
17897 
17898 // CHECK-LABEL: @test_vmax_f64(
17899 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17900 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17901 // CHECK:   [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
17902 // CHECK:   ret <1 x double> [[VMAX2_I]]
test_vmax_f64(float64x1_t a,float64x1_t b)17903 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
17904   return vmax_f64(a, b);
17905 }
17906 
17907 // CHECK-LABEL: @test_vmin_f64(
17908 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17909 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17910 // CHECK:   [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
17911 // CHECK:   ret <1 x double> [[VMIN2_I]]
test_vmin_f64(float64x1_t a,float64x1_t b)17912 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
17913   return vmin_f64(a, b);
17914 }
17915 
17916 // CHECK-LABEL: @test_vmaxnm_f64(
17917 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17918 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17919 // CHECK:   [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
17920 // CHECK:   ret <1 x double> [[VMAXNM2_I]]
test_vmaxnm_f64(float64x1_t a,float64x1_t b)17921 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
17922   return vmaxnm_f64(a, b);
17923 }
17924 
17925 // CHECK-LABEL: @test_vminnm_f64(
17926 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17927 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17928 // CHECK:   [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
17929 // CHECK:   ret <1 x double> [[VMINNM2_I]]
test_vminnm_f64(float64x1_t a,float64x1_t b)17930 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
17931   return vminnm_f64(a, b);
17932 }
17933 
17934 // CHECK-LABEL: @test_vabs_f64(
17935 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17936 // CHECK:   [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
17937 // CHECK:   ret <1 x double> [[VABS1_I]]
test_vabs_f64(float64x1_t a)17938 float64x1_t test_vabs_f64(float64x1_t a) {
17939   return vabs_f64(a);
17940 }
17941 
17942 // CHECK-LABEL: @test_vneg_f64(
17943 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %a
17944 // CHECK:   ret <1 x double> [[SUB_I]]
test_vneg_f64(float64x1_t a)17945 float64x1_t test_vneg_f64(float64x1_t a) {
17946   return vneg_f64(a);
17947 }
17948 
17949 // CHECK-LABEL: @test_vcvt_s64_f64(
17950 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17951 // CHECK:   [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64>
17952 // CHECK:   ret <1 x i64> [[TMP1]]
test_vcvt_s64_f64(float64x1_t a)17953 int64x1_t test_vcvt_s64_f64(float64x1_t a) {
17954   return vcvt_s64_f64(a);
17955 }
17956 
17957 // CHECK-LABEL: @test_vcvt_u64_f64(
17958 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17959 // CHECK:   [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64>
17960 // CHECK:   ret <1 x i64> [[TMP1]]
test_vcvt_u64_f64(float64x1_t a)17961 uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
17962   return vcvt_u64_f64(a);
17963 }
17964 
17965 // CHECK-LABEL: @test_vcvtn_s64_f64(
17966 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17967 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
17968 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_s64_f64(float64x1_t a)17969 int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
17970   return vcvtn_s64_f64(a);
17971 }
17972 
17973 // CHECK-LABEL: @test_vcvtn_u64_f64(
17974 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17975 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
17976 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_u64_f64(float64x1_t a)17977 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
17978   return vcvtn_u64_f64(a);
17979 }
17980 
17981 // CHECK-LABEL: @test_vcvtp_s64_f64(
17982 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17983 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
17984 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_s64_f64(float64x1_t a)17985 int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
17986   return vcvtp_s64_f64(a);
17987 }
17988 
17989 // CHECK-LABEL: @test_vcvtp_u64_f64(
17990 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17991 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
17992 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_u64_f64(float64x1_t a)17993 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
17994   return vcvtp_u64_f64(a);
17995 }
17996 
17997 // CHECK-LABEL: @test_vcvtm_s64_f64(
17998 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17999 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
18000 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_s64_f64(float64x1_t a)18001 int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
18002   return vcvtm_s64_f64(a);
18003 }
18004 
18005 // CHECK-LABEL: @test_vcvtm_u64_f64(
18006 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18007 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
18008 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_u64_f64(float64x1_t a)18009 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
18010   return vcvtm_u64_f64(a);
18011 }
18012 
18013 // CHECK-LABEL: @test_vcvta_s64_f64(
18014 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18015 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
18016 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
test_vcvta_s64_f64(float64x1_t a)18017 int64x1_t test_vcvta_s64_f64(float64x1_t a) {
18018   return vcvta_s64_f64(a);
18019 }
18020 
18021 // CHECK-LABEL: @test_vcvta_u64_f64(
18022 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18023 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
18024 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
test_vcvta_u64_f64(float64x1_t a)18025 uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
18026   return vcvta_u64_f64(a);
18027 }
18028 
18029 // CHECK-LABEL: @test_vcvt_f64_s64(
18030 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18031 // CHECK:   [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
18032 // CHECK:   ret <1 x double> [[VCVT_I]]
test_vcvt_f64_s64(int64x1_t a)18033 float64x1_t test_vcvt_f64_s64(int64x1_t a) {
18034   return vcvt_f64_s64(a);
18035 }
18036 
18037 // CHECK-LABEL: @test_vcvt_f64_u64(
18038 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18039 // CHECK:   [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
18040 // CHECK:   ret <1 x double> [[VCVT_I]]
test_vcvt_f64_u64(uint64x1_t a)18041 float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
18042   return vcvt_f64_u64(a);
18043 }
18044 
18045 // CHECK-LABEL: @test_vcvt_n_s64_f64(
18046 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18047 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18048 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18049 // CHECK:   ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_s64_f64(float64x1_t a)18050 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
18051   return vcvt_n_s64_f64(a, 64);
18052 }
18053 
18054 // CHECK-LABEL: @test_vcvt_n_u64_f64(
18055 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18056 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18057 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18058 // CHECK:   ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_u64_f64(float64x1_t a)18059 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
18060   return vcvt_n_u64_f64(a, 64);
18061 }
18062 
18063 // CHECK-LABEL: @test_vcvt_n_f64_s64(
18064 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18065 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18066 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18067 // CHECK:   ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_s64(int64x1_t a)18068 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
18069   return vcvt_n_f64_s64(a, 64);
18070 }
18071 
18072 // CHECK-LABEL: @test_vcvt_n_f64_u64(
18073 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18074 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18075 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18076 // CHECK:   ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_u64(uint64x1_t a)18077 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
18078   return vcvt_n_f64_u64(a, 64);
18079 }
18080 
18081 // CHECK-LABEL: @test_vrndn_f64(
18082 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18083 // CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
18084 // CHECK:   ret <1 x double> [[VRNDN1_I]]
test_vrndn_f64(float64x1_t a)18085 float64x1_t test_vrndn_f64(float64x1_t a) {
18086   return vrndn_f64(a);
18087 }
18088 
18089 // CHECK-LABEL: @test_vrnda_f64(
18090 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18091 // CHECK:   [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
18092 // CHECK:   ret <1 x double> [[VRNDA1_I]]
test_vrnda_f64(float64x1_t a)18093 float64x1_t test_vrnda_f64(float64x1_t a) {
18094   return vrnda_f64(a);
18095 }
18096 
18097 // CHECK-LABEL: @test_vrndp_f64(
18098 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18099 // CHECK:   [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
18100 // CHECK:   ret <1 x double> [[VRNDP1_I]]
test_vrndp_f64(float64x1_t a)18101 float64x1_t test_vrndp_f64(float64x1_t a) {
18102   return vrndp_f64(a);
18103 }
18104 
18105 // CHECK-LABEL: @test_vrndm_f64(
18106 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18107 // CHECK:   [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
18108 // CHECK:   ret <1 x double> [[VRNDM1_I]]
test_vrndm_f64(float64x1_t a)18109 float64x1_t test_vrndm_f64(float64x1_t a) {
18110   return vrndm_f64(a);
18111 }
18112 
18113 // CHECK-LABEL: @test_vrndx_f64(
18114 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18115 // CHECK:   [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
18116 // CHECK:   ret <1 x double> [[VRNDX1_I]]
test_vrndx_f64(float64x1_t a)18117 float64x1_t test_vrndx_f64(float64x1_t a) {
18118   return vrndx_f64(a);
18119 }
18120 
18121 // CHECK-LABEL: @test_vrnd_f64(
18122 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18123 // CHECK:   [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
18124 // CHECK:   ret <1 x double> [[VRNDZ1_I]]
test_vrnd_f64(float64x1_t a)18125 float64x1_t test_vrnd_f64(float64x1_t a) {
18126   return vrnd_f64(a);
18127 }
18128 
18129 // CHECK-LABEL: @test_vrndi_f64(
18130 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18131 // CHECK:   [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
18132 // CHECK:   ret <1 x double> [[VRNDI1_I]]
test_vrndi_f64(float64x1_t a)18133 float64x1_t test_vrndi_f64(float64x1_t a) {
18134   return vrndi_f64(a);
18135 }
18136 
18137 // CHECK-LABEL: @test_vrsqrte_f64(
18138 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18139 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
18140 // CHECK:   ret <1 x double> [[VRSQRTE_V1_I]]
test_vrsqrte_f64(float64x1_t a)18141 float64x1_t test_vrsqrte_f64(float64x1_t a) {
18142   return vrsqrte_f64(a);
18143 }
18144 
18145 // CHECK-LABEL: @test_vrecpe_f64(
18146 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18147 // CHECK:   [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
18148 // CHECK:   ret <1 x double> [[VRECPE_V1_I]]
test_vrecpe_f64(float64x1_t a)18149 float64x1_t test_vrecpe_f64(float64x1_t a) {
18150   return vrecpe_f64(a);
18151 }
18152 
18153 // CHECK-LABEL: @test_vsqrt_f64(
18154 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18155 // CHECK:   [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
18156 // CHECK:   ret <1 x double> [[VSQRT_I]]
test_vsqrt_f64(float64x1_t a)18157 float64x1_t test_vsqrt_f64(float64x1_t a) {
18158   return vsqrt_f64(a);
18159 }
18160 
18161 // CHECK-LABEL: @test_vrecps_f64(
18162 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18163 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18164 // CHECK:   [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
18165 // CHECK:   ret <1 x double> [[VRECPS_V2_I]]
test_vrecps_f64(float64x1_t a,float64x1_t b)18166 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
18167   return vrecps_f64(a, b);
18168 }
18169 
18170 // CHECK-LABEL: @test_vrsqrts_f64(
18171 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18172 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18173 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
18174 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
18175 // CHECK:   ret <1 x double> [[VRSQRTS_V2_I]]
test_vrsqrts_f64(float64x1_t a,float64x1_t b)18176 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
18177   return vrsqrts_f64(a, b);
18178 }
18179 
18180 // CHECK-LABEL: @test_vminv_s32(
18181 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18182 // CHECK:   [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
18183 // CHECK:   ret i32 [[VMINV_S32_I]]
test_vminv_s32(int32x2_t a)18184 int32_t test_vminv_s32(int32x2_t a) {
18185   return vminv_s32(a);
18186 }
18187 
18188 // CHECK-LABEL: @test_vminv_u32(
18189 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18190 // CHECK:   [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
18191 // CHECK:   ret i32 [[VMINV_U32_I]]
test_vminv_u32(uint32x2_t a)18192 uint32_t test_vminv_u32(uint32x2_t a) {
18193   return vminv_u32(a);
18194 }
18195 
18196 // CHECK-LABEL: @test_vmaxv_s32(
18197 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18198 // CHECK:   [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
18199 // CHECK:   ret i32 [[VMAXV_S32_I]]
test_vmaxv_s32(int32x2_t a)18200 int32_t test_vmaxv_s32(int32x2_t a) {
18201   return vmaxv_s32(a);
18202 }
18203 
18204 // CHECK-LABEL: @test_vmaxv_u32(
18205 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18206 // CHECK:   [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
18207 // CHECK:   ret i32 [[VMAXV_U32_I]]
test_vmaxv_u32(uint32x2_t a)18208 uint32_t test_vmaxv_u32(uint32x2_t a) {
18209   return vmaxv_u32(a);
18210 }
18211 
18212 // CHECK-LABEL: @test_vaddv_s32(
18213 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18214 // CHECK:   [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
18215 // CHECK:   ret i32 [[VADDV_S32_I]]
test_vaddv_s32(int32x2_t a)18216 int32_t test_vaddv_s32(int32x2_t a) {
18217   return vaddv_s32(a);
18218 }
18219 
18220 // CHECK-LABEL: @test_vaddv_u32(
18221 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18222 // CHECK:   [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
18223 // CHECK:   ret i32 [[VADDV_U32_I]]
test_vaddv_u32(uint32x2_t a)18224 uint32_t test_vaddv_u32(uint32x2_t a) {
18225   return vaddv_u32(a);
18226 }
18227 
18228 // CHECK-LABEL: @test_vaddlv_s32(
18229 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18230 // CHECK:   [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
18231 // CHECK:   ret i64 [[VADDLV_S32_I]]
test_vaddlv_s32(int32x2_t a)18232 int64_t test_vaddlv_s32(int32x2_t a) {
18233   return vaddlv_s32(a);
18234 }
18235 
18236 // CHECK-LABEL: @test_vaddlv_u32(
18237 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18238 // CHECK:   [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
18239 // CHECK:   ret i64 [[VADDLV_U32_I]]
test_vaddlv_u32(uint32x2_t a)18240 uint64_t test_vaddlv_u32(uint32x2_t a) {
18241   return vaddlv_u32(a);
18242 }
18243