1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN:     -fallow-half-arguments-and-returns -S -disable-O0-optnone \
3 // RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
4 // RUN: | opt -S -mem2reg \
5 // RUN: | FileCheck %s
6 
7 // Test new aarch64 intrinsics and types
8 
9 #include <arm_neon.h>
10 
11 // CHECK-LABEL: @test_vadd_s8(
12 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
13 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t v1,int8x8_t v2)14 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
15   return vadd_s8(v1, v2);
16 }
17 
18 // CHECK-LABEL: @test_vadd_s16(
19 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
20 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t v1,int16x4_t v2)21 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
22   return vadd_s16(v1, v2);
23 }
24 
25 // CHECK-LABEL: @test_vadd_s32(
26 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
27 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t v1,int32x2_t v2)28 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
29   return vadd_s32(v1, v2);
30 }
31 
32 // CHECK-LABEL: @test_vadd_s64(
33 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
34 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t v1,int64x1_t v2)35 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
36   return vadd_s64(v1, v2);
37 }
38 
39 // CHECK-LABEL: @test_vadd_f32(
40 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
41 // CHECK:   ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t v1,float32x2_t v2)42 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
43   return vadd_f32(v1, v2);
44 }
45 
46 // CHECK-LABEL: @test_vadd_u8(
47 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
48 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t v1,uint8x8_t v2)49 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
50   return vadd_u8(v1, v2);
51 }
52 
53 // CHECK-LABEL: @test_vadd_u16(
54 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
55 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t v1,uint16x4_t v2)56 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
57   return vadd_u16(v1, v2);
58 }
59 
60 // CHECK-LABEL: @test_vadd_u32(
61 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
62 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t v1,uint32x2_t v2)63 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
64   return vadd_u32(v1, v2);
65 }
66 
67 // CHECK-LABEL: @test_vadd_u64(
68 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
69 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t v1,uint64x1_t v2)70 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
71   return vadd_u64(v1, v2);
72 }
73 
74 // CHECK-LABEL: @test_vaddq_s8(
75 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
76 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t v1,int8x16_t v2)77 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
78   return vaddq_s8(v1, v2);
79 }
80 
81 // CHECK-LABEL: @test_vaddq_s16(
82 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
83 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t v1,int16x8_t v2)84 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
85   return vaddq_s16(v1, v2);
86 }
87 
88 // CHECK-LABEL: @test_vaddq_s32(
89 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
90 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t v1,int32x4_t v2)91 int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
92   return vaddq_s32(v1, v2);
93 }
94 
95 // CHECK-LABEL: @test_vaddq_s64(
96 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
97 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t v1,int64x2_t v2)98 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
99   return vaddq_s64(v1, v2);
100 }
101 
102 // CHECK-LABEL: @test_vaddq_f32(
103 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
104 // CHECK:   ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t v1,float32x4_t v2)105 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
106   return vaddq_f32(v1, v2);
107 }
108 
109 // CHECK-LABEL: @test_vaddq_f64(
110 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
111 // CHECK:   ret <2 x double> [[ADD_I]]
test_vaddq_f64(float64x2_t v1,float64x2_t v2)112 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
113   return vaddq_f64(v1, v2);
114 }
115 
116 // CHECK-LABEL: @test_vaddq_u8(
117 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
118 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t v1,uint8x16_t v2)119 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
120   return vaddq_u8(v1, v2);
121 }
122 
123 // CHECK-LABEL: @test_vaddq_u16(
124 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
125 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t v1,uint16x8_t v2)126 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
127   return vaddq_u16(v1, v2);
128 }
129 
130 // CHECK-LABEL: @test_vaddq_u32(
131 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
132 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t v1,uint32x4_t v2)133 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
134   return vaddq_u32(v1, v2);
135 }
136 
137 // CHECK-LABEL: @test_vaddq_u64(
138 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
139 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t v1,uint64x2_t v2)140 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
141   return vaddq_u64(v1, v2);
142 }
143 
144 // CHECK-LABEL: @test_vsub_s8(
145 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
146 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t v1,int8x8_t v2)147 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
148   return vsub_s8(v1, v2);
149 }
150 
151 // CHECK-LABEL: @test_vsub_s16(
152 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
153 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t v1,int16x4_t v2)154 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
155   return vsub_s16(v1, v2);
156 }
157 
158 // CHECK-LABEL: @test_vsub_s32(
159 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
160 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t v1,int32x2_t v2)161 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
162   return vsub_s32(v1, v2);
163 }
164 
165 // CHECK-LABEL: @test_vsub_s64(
166 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
167 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t v1,int64x1_t v2)168 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
169   return vsub_s64(v1, v2);
170 }
171 
172 // CHECK-LABEL: @test_vsub_f32(
173 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
174 // CHECK:   ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t v1,float32x2_t v2)175 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
176   return vsub_f32(v1, v2);
177 }
178 
179 // CHECK-LABEL: @test_vsub_u8(
180 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
181 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t v1,uint8x8_t v2)182 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
183   return vsub_u8(v1, v2);
184 }
185 
186 // CHECK-LABEL: @test_vsub_u16(
187 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
188 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t v1,uint16x4_t v2)189 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
190   return vsub_u16(v1, v2);
191 }
192 
193 // CHECK-LABEL: @test_vsub_u32(
194 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
195 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t v1,uint32x2_t v2)196 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
197   return vsub_u32(v1, v2);
198 }
199 
200 // CHECK-LABEL: @test_vsub_u64(
201 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
202 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t v1,uint64x1_t v2)203 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
204   return vsub_u64(v1, v2);
205 }
206 
207 // CHECK-LABEL: @test_vsubq_s8(
208 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
209 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t v1,int8x16_t v2)210 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
211   return vsubq_s8(v1, v2);
212 }
213 
214 // CHECK-LABEL: @test_vsubq_s16(
215 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
216 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t v1,int16x8_t v2)217 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
218   return vsubq_s16(v1, v2);
219 }
220 
221 // CHECK-LABEL: @test_vsubq_s32(
222 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
223 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t v1,int32x4_t v2)224 int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) {
225   return vsubq_s32(v1, v2);
226 }
227 
228 // CHECK-LABEL: @test_vsubq_s64(
229 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
230 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t v1,int64x2_t v2)231 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
232   return vsubq_s64(v1, v2);
233 }
234 
235 // CHECK-LABEL: @test_vsubq_f32(
236 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
237 // CHECK:   ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t v1,float32x4_t v2)238 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
239   return vsubq_f32(v1, v2);
240 }
241 
242 // CHECK-LABEL: @test_vsubq_f64(
243 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
244 // CHECK:   ret <2 x double> [[SUB_I]]
test_vsubq_f64(float64x2_t v1,float64x2_t v2)245 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
246   return vsubq_f64(v1, v2);
247 }
248 
249 // CHECK-LABEL: @test_vsubq_u8(
250 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
251 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t v1,uint8x16_t v2)252 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
253   return vsubq_u8(v1, v2);
254 }
255 
256 // CHECK-LABEL: @test_vsubq_u16(
257 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
258 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t v1,uint16x8_t v2)259 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
260   return vsubq_u16(v1, v2);
261 }
262 
263 // CHECK-LABEL: @test_vsubq_u32(
264 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
265 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t v1,uint32x4_t v2)266 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
267   return vsubq_u32(v1, v2);
268 }
269 
270 // CHECK-LABEL: @test_vsubq_u64(
271 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
272 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t v1,uint64x2_t v2)273 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
274   return vsubq_u64(v1, v2);
275 }
276 
277 // CHECK-LABEL: @test_vmul_s8(
278 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
279 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t v1,int8x8_t v2)280 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
281   return vmul_s8(v1, v2);
282 }
283 
284 // CHECK-LABEL: @test_vmul_s16(
285 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
286 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t v1,int16x4_t v2)287 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
288   return vmul_s16(v1, v2);
289 }
290 
291 // CHECK-LABEL: @test_vmul_s32(
292 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
293 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t v1,int32x2_t v2)294 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
295   return vmul_s32(v1, v2);
296 }
297 
298 // CHECK-LABEL: @test_vmul_f32(
299 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
300 // CHECK:   ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t v1,float32x2_t v2)301 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
302   return vmul_f32(v1, v2);
303 }
304 
305 // CHECK-LABEL: @test_vmul_u8(
306 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
307 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t v1,uint8x8_t v2)308 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
309   return vmul_u8(v1, v2);
310 }
311 
312 // CHECK-LABEL: @test_vmul_u16(
313 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
314 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t v1,uint16x4_t v2)315 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
316   return vmul_u16(v1, v2);
317 }
318 
319 // CHECK-LABEL: @test_vmul_u32(
320 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
321 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t v1,uint32x2_t v2)322 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
323   return vmul_u32(v1, v2);
324 }
325 
326 // CHECK-LABEL: @test_vmulq_s8(
327 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
328 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t v1,int8x16_t v2)329 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
330   return vmulq_s8(v1, v2);
331 }
332 
333 // CHECK-LABEL: @test_vmulq_s16(
334 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
335 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t v1,int16x8_t v2)336 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
337   return vmulq_s16(v1, v2);
338 }
339 
340 // CHECK-LABEL: @test_vmulq_s32(
341 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
342 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t v1,int32x4_t v2)343 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
344   return vmulq_s32(v1, v2);
345 }
346 
347 // CHECK-LABEL: @test_vmulq_u8(
348 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
349 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t v1,uint8x16_t v2)350 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
351   return vmulq_u8(v1, v2);
352 }
353 
354 // CHECK-LABEL: @test_vmulq_u16(
355 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
356 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t v1,uint16x8_t v2)357 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
358   return vmulq_u16(v1, v2);
359 }
360 
361 // CHECK-LABEL: @test_vmulq_u32(
362 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
363 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t v1,uint32x4_t v2)364 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
365   return vmulq_u32(v1, v2);
366 }
367 
368 // CHECK-LABEL: @test_vmulq_f32(
369 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
370 // CHECK:   ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t v1,float32x4_t v2)371 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
372   return vmulq_f32(v1, v2);
373 }
374 
375 // CHECK-LABEL: @test_vmulq_f64(
376 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
377 // CHECK:   ret <2 x double> [[MUL_I]]
test_vmulq_f64(float64x2_t v1,float64x2_t v2)378 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
379   return vmulq_f64(v1, v2);
380 }
381 
382 // CHECK-LABEL: @test_vmul_p8(
383 // CHECK:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
384 // CHECK:   ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t v1,poly8x8_t v2)385 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
386   return vmul_p8(v1, v2);
387 }
388 
389 // CHECK-LABEL: @test_vmulq_p8(
390 // CHECK:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
391 // CHECK:   ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t v1,poly8x16_t v2)392 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
393   return vmulq_p8(v1, v2);
394 }
395 
396 // CHECK-LABEL: @test_vmla_s8(
397 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
398 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
399 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)400 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
401   return vmla_s8(v1, v2, v3);
402 }
403 
404 // CHECK-LABEL: @test_vmla_s16(
405 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
406 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
407 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
408 // CHECK:   ret <8 x i8> [[TMP0]]
test_vmla_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)409 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
410   return (int8x8_t)vmla_s16(v1, v2, v3);
411 }
412 
413 // CHECK-LABEL: @test_vmla_s32(
414 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
415 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
416 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)417 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
418   return vmla_s32(v1, v2, v3);
419 }
420 
421 // CHECK-LABEL: @test_vmla_f32(
422 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
423 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
424 // CHECK:   ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)425 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
426   return vmla_f32(v1, v2, v3);
427 }
428 
429 // CHECK-LABEL: @test_vmla_u8(
430 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
431 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
432 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)433 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
434   return vmla_u8(v1, v2, v3);
435 }
436 
437 // CHECK-LABEL: @test_vmla_u16(
438 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
439 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
440 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)441 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
442   return vmla_u16(v1, v2, v3);
443 }
444 
445 // CHECK-LABEL: @test_vmla_u32(
446 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
447 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
448 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)449 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
450   return vmla_u32(v1, v2, v3);
451 }
452 
453 // CHECK-LABEL: @test_vmlaq_s8(
454 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
455 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
456 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)457 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
458   return vmlaq_s8(v1, v2, v3);
459 }
460 
461 // CHECK-LABEL: @test_vmlaq_s16(
462 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
463 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
464 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)465 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
466   return vmlaq_s16(v1, v2, v3);
467 }
468 
469 // CHECK-LABEL: @test_vmlaq_s32(
470 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
471 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
472 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)473 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
474   return vmlaq_s32(v1, v2, v3);
475 }
476 
477 // CHECK-LABEL: @test_vmlaq_f32(
478 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
479 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
480 // CHECK:   ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)481 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
482   return vmlaq_f32(v1, v2, v3);
483 }
484 
485 // CHECK-LABEL: @test_vmlaq_u8(
486 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
487 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
488 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)489 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
490   return vmlaq_u8(v1, v2, v3);
491 }
492 
493 // CHECK-LABEL: @test_vmlaq_u16(
494 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
495 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
496 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)497 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
498   return vmlaq_u16(v1, v2, v3);
499 }
500 
501 // CHECK-LABEL: @test_vmlaq_u32(
502 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
503 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
504 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)505 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
506   return vmlaq_u32(v1, v2, v3);
507 }
508 
509 // CHECK-LABEL: @test_vmlaq_f64(
510 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
511 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
512 // CHECK:   ret <2 x double> [[ADD_I]]
test_vmlaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)513 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
514   return vmlaq_f64(v1, v2, v3);
515 }
516 
517 // CHECK-LABEL: @test_vmls_s8(
518 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
519 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
520 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)521 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
522   return vmls_s8(v1, v2, v3);
523 }
524 
525 // CHECK-LABEL: @test_vmls_s16(
526 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
527 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
528 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
529 // CHECK:   ret <8 x i8> [[TMP0]]
test_vmls_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)530 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
531   return (int8x8_t)vmls_s16(v1, v2, v3);
532 }
533 
534 // CHECK-LABEL: @test_vmls_s32(
535 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
536 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
537 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)538 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
539   return vmls_s32(v1, v2, v3);
540 }
541 
542 // CHECK-LABEL: @test_vmls_f32(
543 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
544 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
545 // CHECK:   ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)546 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
547   return vmls_f32(v1, v2, v3);
548 }
549 
550 // CHECK-LABEL: @test_vmls_u8(
551 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
552 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
553 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)554 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
555   return vmls_u8(v1, v2, v3);
556 }
557 
558 // CHECK-LABEL: @test_vmls_u16(
559 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
560 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
561 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)562 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
563   return vmls_u16(v1, v2, v3);
564 }
565 
566 // CHECK-LABEL: @test_vmls_u32(
567 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
568 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
569 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)570 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
571   return vmls_u32(v1, v2, v3);
572 }
573 
574 // CHECK-LABEL: @test_vmlsq_s8(
575 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
576 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
577 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)578 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
579   return vmlsq_s8(v1, v2, v3);
580 }
581 
582 // CHECK-LABEL: @test_vmlsq_s16(
583 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
584 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
585 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)586 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
587   return vmlsq_s16(v1, v2, v3);
588 }
589 
590 // CHECK-LABEL: @test_vmlsq_s32(
591 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
592 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
593 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)594 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
595   return vmlsq_s32(v1, v2, v3);
596 }
597 
598 // CHECK-LABEL: @test_vmlsq_f32(
599 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
600 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
601 // CHECK:   ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)602 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
603   return vmlsq_f32(v1, v2, v3);
604 }
605 
606 // CHECK-LABEL: @test_vmlsq_u8(
607 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
608 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
609 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)610 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
611   return vmlsq_u8(v1, v2, v3);
612 }
613 
614 // CHECK-LABEL: @test_vmlsq_u16(
615 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
616 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
617 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)618 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
619   return vmlsq_u16(v1, v2, v3);
620 }
621 
622 // CHECK-LABEL: @test_vmlsq_u32(
623 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
624 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
625 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)626 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
627   return vmlsq_u32(v1, v2, v3);
628 }
629 
630 // CHECK-LABEL: @test_vmlsq_f64(
631 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
632 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
633 // CHECK:   ret <2 x double> [[SUB_I]]
test_vmlsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)634 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
635   return vmlsq_f64(v1, v2, v3);
636 }
637 
638 // CHECK-LABEL: @test_vfma_f32(
639 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
640 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
641 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
642 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
643 // CHECK:   ret <2 x float> [[TMP3]]
test_vfma_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)644 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
645   return vfma_f32(v1, v2, v3);
646 }
647 
648 // CHECK-LABEL: @test_vfmaq_f32(
649 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
650 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
651 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
652 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
653 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)654 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
655   return vfmaq_f32(v1, v2, v3);
656 }
657 
658 // CHECK-LABEL: @test_vfmaq_f64(
659 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
660 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
661 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
662 // CHECK:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
663 // CHECK:   ret <2 x double> [[TMP3]]
test_vfmaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)664 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
665   return vfmaq_f64(v1, v2, v3);
666 }
667 
668 // CHECK-LABEL: @test_vfms_f32(
669 // CHECK:   [[SUB_I:%.*]] = fneg <2 x float> %v2
670 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
671 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
672 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
673 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
674 // CHECK:   ret <2 x float> [[TMP3]]
test_vfms_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)675 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
676   return vfms_f32(v1, v2, v3);
677 }
678 
679 // CHECK-LABEL: @test_vfmsq_f32(
680 // CHECK:   [[SUB_I:%.*]] = fneg <4 x float> %v2
681 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
682 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
683 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
684 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
685 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)686 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
687   return vfmsq_f32(v1, v2, v3);
688 }
689 
690 // CHECK-LABEL: @test_vfmsq_f64(
691 // CHECK:   [[SUB_I:%.*]] = fneg <2 x double> %v2
692 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
693 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
694 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
695 // CHECK:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
696 // CHECK:   ret <2 x double> [[TMP3]]
test_vfmsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)697 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
698   return vfmsq_f64(v1, v2, v3);
699 }
700 
701 // CHECK-LABEL: @test_vdivq_f64(
702 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
703 // CHECK:   ret <2 x double> [[DIV_I]]
test_vdivq_f64(float64x2_t v1,float64x2_t v2)704 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
705   return vdivq_f64(v1, v2);
706 }
707 
708 // CHECK-LABEL: @test_vdivq_f32(
709 // CHECK:   [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
710 // CHECK:   ret <4 x float> [[DIV_I]]
test_vdivq_f32(float32x4_t v1,float32x4_t v2)711 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
712   return vdivq_f32(v1, v2);
713 }
714 
715 // CHECK-LABEL: @test_vdiv_f32(
716 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
717 // CHECK:   ret <2 x float> [[DIV_I]]
test_vdiv_f32(float32x2_t v1,float32x2_t v2)718 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
719   return vdiv_f32(v1, v2);
720 }
721 
722 // CHECK-LABEL: @test_vaba_s8(
723 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
724 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
725 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)726 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
727   return vaba_s8(v1, v2, v3);
728 }
729 
730 // CHECK-LABEL: @test_vaba_s16(
731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
732 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
733 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
734 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
735 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)736 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
737   return vaba_s16(v1, v2, v3);
738 }
739 
740 // CHECK-LABEL: @test_vaba_s32(
741 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
742 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
743 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
744 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
745 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)746 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
747   return vaba_s32(v1, v2, v3);
748 }
749 
750 // CHECK-LABEL: @test_vaba_u8(
751 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
752 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
753 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)754 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
755   return vaba_u8(v1, v2, v3);
756 }
757 
758 // CHECK-LABEL: @test_vaba_u16(
759 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
760 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
761 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
762 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
763 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)764 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
765   return vaba_u16(v1, v2, v3);
766 }
767 
768 // CHECK-LABEL: @test_vaba_u32(
769 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
770 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
771 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
772 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
773 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)774 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
775   return vaba_u32(v1, v2, v3);
776 }
777 
778 // CHECK-LABEL: @test_vabaq_s8(
779 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
780 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
781 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)782 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
783   return vabaq_s8(v1, v2, v3);
784 }
785 
786 // CHECK-LABEL: @test_vabaq_s16(
787 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
788 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
789 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
790 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
791 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)792 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
793   return vabaq_s16(v1, v2, v3);
794 }
795 
796 // CHECK-LABEL: @test_vabaq_s32(
797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
798 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
799 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
800 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
801 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)802 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
803   return vabaq_s32(v1, v2, v3);
804 }
805 
806 // CHECK-LABEL: @test_vabaq_u8(
807 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
808 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
809 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)810 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
811   return vabaq_u8(v1, v2, v3);
812 }
813 
814 // CHECK-LABEL: @test_vabaq_u16(
815 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
816 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
817 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
818 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
819 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)820 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
821   return vabaq_u16(v1, v2, v3);
822 }
823 
824 // CHECK-LABEL: @test_vabaq_u32(
825 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
826 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
827 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
828 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
829 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)830 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
831   return vabaq_u32(v1, v2, v3);
832 }
833 
834 // CHECK-LABEL: @test_vabd_s8(
835 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
836 // CHECK:   ret <8 x i8> [[VABD_I]]
test_vabd_s8(int8x8_t v1,int8x8_t v2)837 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
838   return vabd_s8(v1, v2);
839 }
840 
841 // CHECK-LABEL: @test_vabd_s16(
842 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
843 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
844 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
845 // CHECK:   ret <4 x i16> [[VABD2_I]]
test_vabd_s16(int16x4_t v1,int16x4_t v2)846 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
847   return vabd_s16(v1, v2);
848 }
849 
850 // CHECK-LABEL: @test_vabd_s32(
851 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
852 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
853 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
854 // CHECK:   ret <2 x i32> [[VABD2_I]]
test_vabd_s32(int32x2_t v1,int32x2_t v2)855 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
856   return vabd_s32(v1, v2);
857 }
858 
859 // CHECK-LABEL: @test_vabd_u8(
860 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
861 // CHECK:   ret <8 x i8> [[VABD_I]]
test_vabd_u8(uint8x8_t v1,uint8x8_t v2)862 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
863   return vabd_u8(v1, v2);
864 }
865 
866 // CHECK-LABEL: @test_vabd_u16(
867 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
868 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
869 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
870 // CHECK:   ret <4 x i16> [[VABD2_I]]
test_vabd_u16(uint16x4_t v1,uint16x4_t v2)871 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
872   return vabd_u16(v1, v2);
873 }
874 
875 // CHECK-LABEL: @test_vabd_u32(
876 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
877 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
878 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
879 // CHECK:   ret <2 x i32> [[VABD2_I]]
test_vabd_u32(uint32x2_t v1,uint32x2_t v2)880 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
881   return vabd_u32(v1, v2);
882 }
883 
884 // CHECK-LABEL: @test_vabd_f32(
885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
886 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
887 // CHECK:   [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2)
888 // CHECK:   ret <2 x float> [[VABD2_I]]
test_vabd_f32(float32x2_t v1,float32x2_t v2)889 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
890   return vabd_f32(v1, v2);
891 }
892 
893 // CHECK-LABEL: @test_vabdq_s8(
894 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
895 // CHECK:   ret <16 x i8> [[VABD_I]]
test_vabdq_s8(int8x16_t v1,int8x16_t v2)896 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
897   return vabdq_s8(v1, v2);
898 }
899 
900 // CHECK-LABEL: @test_vabdq_s16(
901 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
902 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
903 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
904 // CHECK:   ret <8 x i16> [[VABD2_I]]
test_vabdq_s16(int16x8_t v1,int16x8_t v2)905 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
906   return vabdq_s16(v1, v2);
907 }
908 
909 // CHECK-LABEL: @test_vabdq_s32(
910 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
911 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
912 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
913 // CHECK:   ret <4 x i32> [[VABD2_I]]
test_vabdq_s32(int32x4_t v1,int32x4_t v2)914 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
915   return vabdq_s32(v1, v2);
916 }
917 
918 // CHECK-LABEL: @test_vabdq_u8(
919 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
920 // CHECK:   ret <16 x i8> [[VABD_I]]
test_vabdq_u8(uint8x16_t v1,uint8x16_t v2)921 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
922   return vabdq_u8(v1, v2);
923 }
924 
925 // CHECK-LABEL: @test_vabdq_u16(
926 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
927 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
928 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
929 // CHECK:   ret <8 x i16> [[VABD2_I]]
test_vabdq_u16(uint16x8_t v1,uint16x8_t v2)930 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
931   return vabdq_u16(v1, v2);
932 }
933 
934 // CHECK-LABEL: @test_vabdq_u32(
935 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
936 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
937 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
938 // CHECK:   ret <4 x i32> [[VABD2_I]]
test_vabdq_u32(uint32x4_t v1,uint32x4_t v2)939 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
940   return vabdq_u32(v1, v2);
941 }
942 
943 // CHECK-LABEL: @test_vabdq_f32(
944 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
945 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
946 // CHECK:   [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2)
947 // CHECK:   ret <4 x float> [[VABD2_I]]
test_vabdq_f32(float32x4_t v1,float32x4_t v2)948 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
949   return vabdq_f32(v1, v2);
950 }
951 
952 // CHECK-LABEL: @test_vabdq_f64(
953 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
954 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
955 // CHECK:   [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2)
956 // CHECK:   ret <2 x double> [[VABD2_I]]
test_vabdq_f64(float64x2_t v1,float64x2_t v2)957 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
958   return vabdq_f64(v1, v2);
959 }
960 
961 // CHECK-LABEL: @test_vbsl_s8(
962 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
963 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
964 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
965 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
966 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_s8(uint8x8_t v1,int8x8_t v2,int8x8_t v3)967 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
968   return vbsl_s8(v1, v2, v3);
969 }
970 
971 // CHECK-LABEL: @test_vbsl_s16(
972 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
973 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
974 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
975 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
976 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
977 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
978 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
979 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
980 // CHECK:   ret <8 x i8> [[TMP4]]
test_vbsl_s16(uint16x4_t v1,int16x4_t v2,int16x4_t v3)981 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
982   return (int8x8_t)vbsl_s16(v1, v2, v3);
983 }
984 
985 // CHECK-LABEL: @test_vbsl_s32(
986 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
987 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
988 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
989 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
990 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
991 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
992 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
993 // CHECK:   ret <2 x i32> [[VBSL5_I]]
test_vbsl_s32(uint32x2_t v1,int32x2_t v2,int32x2_t v3)994 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
995   return vbsl_s32(v1, v2, v3);
996 }
997 
998 // CHECK-LABEL: @test_vbsl_s64(
999 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1000 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1001 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1002 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1003 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1004 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1005 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1006 // CHECK:   ret <1 x i64> [[VBSL5_I]]
test_vbsl_s64(uint64x1_t v1,int64x1_t v2,int64x1_t v3)1007 int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) {
1008   return vbsl_s64(v1, v2, v3);
1009 }
1010 
1011 // CHECK-LABEL: @test_vbsl_u8(
1012 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1013 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1014 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1015 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1016 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)1017 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
1018   return vbsl_u8(v1, v2, v3);
1019 }
1020 
1021 // CHECK-LABEL: @test_vbsl_u16(
1022 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1023 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1024 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1025 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1026 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1027 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1028 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1029 // CHECK:   ret <4 x i16> [[VBSL5_I]]
test_vbsl_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)1030 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
1031   return vbsl_u16(v1, v2, v3);
1032 }
1033 
1034 // CHECK-LABEL: @test_vbsl_u32(
1035 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1036 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1037 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1038 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
1039 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1040 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
1041 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1042 // CHECK:   ret <2 x i32> [[VBSL5_I]]
test_vbsl_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)1043 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
1044   return vbsl_u32(v1, v2, v3);
1045 }
1046 
1047 // CHECK-LABEL: @test_vbsl_u64(
1048 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1049 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1050 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1051 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1052 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1053 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1054 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1055 // CHECK:   ret <1 x i64> [[VBSL5_I]]
test_vbsl_u64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1056 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1057   return vbsl_u64(v1, v2, v3);
1058 }
1059 
1060 // CHECK-LABEL: @test_vbsl_f32(
1061 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1062 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1063 // CHECK:   [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1064 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1065 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1066 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]]
1067 // CHECK:   [[TMP4:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1068 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1069 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1070 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1071 // CHECK:   ret <2 x float> [[TMP5]]
test_vbsl_f32(uint32x2_t v1,float32x2_t v2,float32x2_t v3)1072 float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
1073   return vbsl_f32(v1, v2, v3);
1074 }
1075 
1076 // CHECK-LABEL: @test_vbsl_f64(
1077 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1078 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1079 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1080 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1081 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1082 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]]
1083 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1084 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1085 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1086 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1087 // CHECK:   ret <1 x double> [[TMP4]]
test_vbsl_f64(uint64x1_t v1,float64x1_t v2,float64x1_t v3)1088 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
1089   return vbsl_f64(v1, v2, v3);
1090 }
1091 
1092 // CHECK-LABEL: @test_vbsl_p8(
1093 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1094 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1095 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1096 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1097 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_p8(uint8x8_t v1,poly8x8_t v2,poly8x8_t v3)1098 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
1099   return vbsl_p8(v1, v2, v3);
1100 }
1101 
1102 // CHECK-LABEL: @test_vbsl_p16(
1103 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1104 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1105 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1106 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1107 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1108 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1109 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1110 // CHECK:   ret <4 x i16> [[VBSL5_I]]
test_vbsl_p16(uint16x4_t v1,poly16x4_t v2,poly16x4_t v3)1111 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
1112   return vbsl_p16(v1, v2, v3);
1113 }
1114 
1115 // CHECK-LABEL: @test_vbslq_s8(
1116 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1117 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1118 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1119 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1120 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_s8(uint8x16_t v1,int8x16_t v2,int8x16_t v3)1121 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
1122   return vbslq_s8(v1, v2, v3);
1123 }
1124 
1125 // CHECK-LABEL: @test_vbslq_s16(
1126 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1127 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1128 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1129 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1130 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1131 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1132 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1133 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_s16(uint16x8_t v1,int16x8_t v2,int16x8_t v3)1134 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
1135   return vbslq_s16(v1, v2, v3);
1136 }
1137 
1138 // CHECK-LABEL: @test_vbslq_s32(
1139 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1140 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1141 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1142 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1143 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1144 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1145 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146 // CHECK:   ret <4 x i32> [[VBSL5_I]]
test_vbslq_s32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1147 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1148   return vbslq_s32(v1, v2, v3);
1149 }
1150 
1151 // CHECK-LABEL: @test_vbslq_s64(
1152 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1153 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1154 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1155 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1156 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1157 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1158 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1159 // CHECK:   ret <2 x i64> [[VBSL5_I]]
test_vbslq_s64(uint64x2_t v1,int64x2_t v2,int64x2_t v3)1160 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
1161   return vbslq_s64(v1, v2, v3);
1162 }
1163 
1164 // CHECK-LABEL: @test_vbslq_u8(
1165 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1166 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1167 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1168 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1169 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)1170 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
1171   return vbslq_u8(v1, v2, v3);
1172 }
1173 
1174 // CHECK-LABEL: @test_vbslq_u16(
1175 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1176 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1177 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1178 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1179 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1180 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1181 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1182 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)1183 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
1184   return vbslq_u16(v1, v2, v3);
1185 }
1186 
1187 // CHECK-LABEL: @test_vbslq_u32(
1188 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1189 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1190 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1191 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1192 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1193 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1194 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1195 // CHECK:   ret <4 x i32> [[VBSL5_I]]
test_vbslq_u32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1196 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1197   return vbslq_s32(v1, v2, v3);
1198 }
1199 
1200 // CHECK-LABEL: @test_vbslq_u64(
1201 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1202 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1203 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1204 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1205 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1206 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1207 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1208 // CHECK:   ret <2 x i64> [[VBSL5_I]]
test_vbslq_u64(uint64x2_t v1,uint64x2_t v2,uint64x2_t v3)1209 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
1210   return vbslq_u64(v1, v2, v3);
1211 }
1212 
1213 // CHECK-LABEL: @test_vbslq_f32(
1214 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1215 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1216 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1217 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1218 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1219 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]]
1220 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1221 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1222 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1223 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1224 // CHECK:   ret <4 x float> [[TMP4]]
test_vbslq_f32(uint32x4_t v1,float32x4_t v2,float32x4_t v3)1225 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
1226   return vbslq_f32(v1, v2, v3);
1227 }
1228 
1229 // CHECK-LABEL: @test_vbslq_p8(
1230 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1231 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1232 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1233 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1234 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_p8(uint8x16_t v1,poly8x16_t v2,poly8x16_t v3)1235 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
1236   return vbslq_p8(v1, v2, v3);
1237 }
1238 
1239 // CHECK-LABEL: @test_vbslq_p16(
1240 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1241 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1242 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1243 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1244 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1245 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1246 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1247 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_p16(uint16x8_t v1,poly16x8_t v2,poly16x8_t v3)1248 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
1249   return vbslq_p16(v1, v2, v3);
1250 }
1251 
1252 // CHECK-LABEL: @test_vbslq_f64(
1253 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1254 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1255 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1256 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1257 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1258 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]]
1259 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1260 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1261 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1262 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1263 // CHECK:   ret <2 x double> [[TMP4]]
test_vbslq_f64(uint64x2_t v1,float64x2_t v2,float64x2_t v3)1264 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
1265   return vbslq_f64(v1, v2, v3);
1266 }
1267 
1268 // CHECK-LABEL: @test_vrecps_f32(
1269 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1270 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1271 // CHECK:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
1272 // CHECK:   ret <2 x float> [[VRECPS_V2_I]]
test_vrecps_f32(float32x2_t v1,float32x2_t v2)1273 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
1274   return vrecps_f32(v1, v2);
1275 }
1276 
1277 // CHECK-LABEL: @test_vrecpsq_f32(
1278 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1279 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1280 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
1281 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1282 // CHECK:   ret <4 x float> [[VRECPSQ_V2_I]]
test_vrecpsq_f32(float32x4_t v1,float32x4_t v2)1283 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
1284   return vrecpsq_f32(v1, v2);
1285 }
1286 
1287 // CHECK-LABEL: @test_vrecpsq_f64(
1288 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1289 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1290 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
1291 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1292 // CHECK:   ret <2 x double> [[VRECPSQ_V2_I]]
test_vrecpsq_f64(float64x2_t v1,float64x2_t v2)1293 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
1294   return vrecpsq_f64(v1, v2);
1295 }
1296 
1297 // CHECK-LABEL: @test_vrsqrts_f32(
1298 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1299 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1300 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
1301 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1302 // CHECK:   ret <2 x float> [[VRSQRTS_V2_I]]
test_vrsqrts_f32(float32x2_t v1,float32x2_t v2)1303 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
1304   return vrsqrts_f32(v1, v2);
1305 }
1306 
1307 // CHECK-LABEL: @test_vrsqrtsq_f32(
1308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1309 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1310 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
1311 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1312 // CHECK:   ret <4 x float> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f32(float32x4_t v1,float32x4_t v2)1313 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
1314   return vrsqrtsq_f32(v1, v2);
1315 }
1316 
1317 // CHECK-LABEL: @test_vrsqrtsq_f64(
1318 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1319 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1320 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
1321 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1322 // CHECK:   ret <2 x double> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f64(float64x2_t v1,float64x2_t v2)1323 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
1324   return vrsqrtsq_f64(v1, v2);
1325 }
1326 
1327 // CHECK-LABEL: @test_vcage_f32(
1328 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1329 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1330 // CHECK:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1331 // CHECK:   ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t v1,float32x2_t v2)1332 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
1333   return vcage_f32(v1, v2);
1334 }
1335 
1336 // CHECK-LABEL: @test_vcage_f64(
1337 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1338 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1339 // CHECK:   [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1340 // CHECK:   ret <1 x i64> [[VCAGE_V2_I]]
test_vcage_f64(float64x1_t a,float64x1_t b)1341 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
1342   return vcage_f64(a, b);
1343 }
1344 
1345 // CHECK-LABEL: @test_vcageq_f32(
1346 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1347 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1348 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1349 // CHECK:   ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t v1,float32x4_t v2)1350 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
1351   return vcageq_f32(v1, v2);
1352 }
1353 
1354 // CHECK-LABEL: @test_vcageq_f64(
1355 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1356 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1358 // CHECK:   ret <2 x i64> [[VCAGEQ_V2_I]]
test_vcageq_f64(float64x2_t v1,float64x2_t v2)1359 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
1360   return vcageq_f64(v1, v2);
1361 }
1362 
1363 // CHECK-LABEL: @test_vcagt_f32(
1364 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1365 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1366 // CHECK:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1367 // CHECK:   ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t v1,float32x2_t v2)1368 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
1369   return vcagt_f32(v1, v2);
1370 }
1371 
1372 // CHECK-LABEL: @test_vcagt_f64(
1373 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1374 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1375 // CHECK:   [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1376 // CHECK:   ret <1 x i64> [[VCAGT_V2_I]]
test_vcagt_f64(float64x1_t a,float64x1_t b)1377 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
1378   return vcagt_f64(a, b);
1379 }
1380 
1381 // CHECK-LABEL: @test_vcagtq_f32(
1382 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1383 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1384 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1385 // CHECK:   ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t v1,float32x4_t v2)1386 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
1387   return vcagtq_f32(v1, v2);
1388 }
1389 
1390 // CHECK-LABEL: @test_vcagtq_f64(
1391 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1392 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1393 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1394 // CHECK:   ret <2 x i64> [[VCAGTQ_V2_I]]
test_vcagtq_f64(float64x2_t v1,float64x2_t v2)1395 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
1396   return vcagtq_f64(v1, v2);
1397 }
1398 
1399 // CHECK-LABEL: @test_vcale_f32(
1400 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1401 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1402 // CHECK:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1403 // CHECK:   ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t v1,float32x2_t v2)1404 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
1405   return vcale_f32(v1, v2);
1406   // Using registers other than v0, v1 are possible, but would be odd.
1407 }
1408 
1409 // CHECK-LABEL: @test_vcale_f64(
1410 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1411 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1412 // CHECK:   [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1413 // CHECK:   ret <1 x i64> [[VCALE_V2_I]]
test_vcale_f64(float64x1_t a,float64x1_t b)1414 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
1415   return vcale_f64(a, b);
1416 }
1417 
1418 // CHECK-LABEL: @test_vcaleq_f32(
1419 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1420 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1421 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1422 // CHECK:   ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t v1,float32x4_t v2)1423 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
1424   return vcaleq_f32(v1, v2);
1425   // Using registers other than v0, v1 are possible, but would be odd.
1426 }
1427 
1428 // CHECK-LABEL: @test_vcaleq_f64(
1429 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1430 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1431 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1432 // CHECK:   ret <2 x i64> [[VCALEQ_V2_I]]
test_vcaleq_f64(float64x2_t v1,float64x2_t v2)1433 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
1434   return vcaleq_f64(v1, v2);
1435   // Using registers other than v0, v1 are possible, but would be odd.
1436 }
1437 
1438 // CHECK-LABEL: @test_vcalt_f32(
1439 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1440 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1441 // CHECK:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1442 // CHECK:   ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t v1,float32x2_t v2)1443 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
1444   return vcalt_f32(v1, v2);
1445   // Using registers other than v0, v1 are possible, but would be odd.
1446 }
1447 
1448 // CHECK-LABEL: @test_vcalt_f64(
1449 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1450 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1451 // CHECK:   [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1452 // CHECK:   ret <1 x i64> [[VCALT_V2_I]]
test_vcalt_f64(float64x1_t a,float64x1_t b)1453 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
1454   return vcalt_f64(a, b);
1455 }
1456 
1457 // CHECK-LABEL: @test_vcaltq_f32(
1458 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1459 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1460 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1461 // CHECK:   ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t v1,float32x4_t v2)1462 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
1463   return vcaltq_f32(v1, v2);
1464   // Using registers other than v0, v1 are possible, but would be odd.
1465 }
1466 
1467 // CHECK-LABEL: @test_vcaltq_f64(
1468 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1469 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1470 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1471 // CHECK:   ret <2 x i64> [[VCALTQ_V2_I]]
test_vcaltq_f64(float64x2_t v1,float64x2_t v2)1472 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
1473   return vcaltq_f64(v1, v2);
1474   // Using registers other than v0, v1 are possible, but would be odd.
1475 }
1476 
1477 // CHECK-LABEL: @test_vtst_s8(
1478 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1479 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1480 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1481 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t v1,int8x8_t v2)1482 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
1483   return vtst_s8(v1, v2);
1484 }
1485 
1486 // CHECK-LABEL: @test_vtst_s16(
1487 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1488 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1489 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1490 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1491 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1492 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t v1,int16x4_t v2)1493 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
1494   return vtst_s16(v1, v2);
1495 }
1496 
1497 // CHECK-LABEL: @test_vtst_s32(
1498 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1499 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1500 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1501 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1502 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1503 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t v1,int32x2_t v2)1504 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
1505   return vtst_s32(v1, v2);
1506 }
1507 
1508 // CHECK-LABEL: @test_vtst_u8(
1509 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1510 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1511 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1512 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t v1,uint8x8_t v2)1513 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
1514   return vtst_u8(v1, v2);
1515 }
1516 
1517 // CHECK-LABEL: @test_vtst_u16(
1518 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1519 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1520 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1521 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1522 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1523 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t v1,uint16x4_t v2)1524 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
1525   return vtst_u16(v1, v2);
1526 }
1527 
1528 // CHECK-LABEL: @test_vtst_u32(
1529 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1530 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1531 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1532 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1533 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1534 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t v1,uint32x2_t v2)1535 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
1536   return vtst_u32(v1, v2);
1537 }
1538 
1539 // CHECK-LABEL: @test_vtstq_s8(
1540 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1541 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1542 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1543 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t v1,int8x16_t v2)1544 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
1545   return vtstq_s8(v1, v2);
1546 }
1547 
1548 // CHECK-LABEL: @test_vtstq_s16(
1549 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1550 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1551 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1552 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1553 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1554 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t v1,int16x8_t v2)1555 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
1556   return vtstq_s16(v1, v2);
1557 }
1558 
1559 // CHECK-LABEL: @test_vtstq_s32(
1560 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1561 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1562 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1563 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1564 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1565 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t v1,int32x4_t v2)1566 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
1567   return vtstq_s32(v1, v2);
1568 }
1569 
1570 // CHECK-LABEL: @test_vtstq_u8(
1571 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1572 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1573 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1574 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t v1,uint8x16_t v2)1575 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
1576   return vtstq_u8(v1, v2);
1577 }
1578 
1579 // CHECK-LABEL: @test_vtstq_u16(
1580 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1581 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1582 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1583 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1584 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1585 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t v1,uint16x8_t v2)1586 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
1587   return vtstq_u16(v1, v2);
1588 }
1589 
1590 // CHECK-LABEL: @test_vtstq_u32(
1591 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1592 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1593 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1594 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1595 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1596 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t v1,uint32x4_t v2)1597 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
1598   return vtstq_u32(v1, v2);
1599 }
1600 
1601 // CHECK-LABEL: @test_vtstq_s64(
1602 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1603 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1604 // CHECK:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1605 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1606 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1607 // CHECK:   ret <2 x i64> [[VTST_I]]
test_vtstq_s64(int64x2_t v1,int64x2_t v2)1608 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
1609   return vtstq_s64(v1, v2);
1610 }
1611 
1612 // CHECK-LABEL: @test_vtstq_u64(
1613 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1614 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1615 // CHECK:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1616 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1617 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1618 // CHECK:   ret <2 x i64> [[VTST_I]]
test_vtstq_u64(uint64x2_t v1,uint64x2_t v2)1619 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
1620   return vtstq_u64(v1, v2);
1621 }
1622 
1623 // CHECK-LABEL: @test_vtst_p8(
1624 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1625 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1626 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1627 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t v1,poly8x8_t v2)1628 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
1629   return vtst_p8(v1, v2);
1630 }
1631 
1632 // CHECK-LABEL: @test_vtst_p16(
1633 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1634 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1635 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1636 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1637 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1638 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t v1,poly16x4_t v2)1639 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
1640   return vtst_p16(v1, v2);
1641 }
1642 
1643 // CHECK-LABEL: @test_vtstq_p8(
1644 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1645 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1646 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1647 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t v1,poly8x16_t v2)1648 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
1649   return vtstq_p8(v1, v2);
1650 }
1651 
1652 // CHECK-LABEL: @test_vtstq_p16(
1653 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1654 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1655 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1656 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1657 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1658 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t v1,poly16x8_t v2)1659 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
1660   return vtstq_p16(v1, v2);
1661 }
1662 
1663 // CHECK-LABEL: @test_vtst_s64(
1664 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1665 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1666 // CHECK:   [[TMP2:%.*]] = and <1 x i64> %a, %b
1667 // CHECK:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1668 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1669 // CHECK:   ret <1 x i64> [[VTST_I]]
test_vtst_s64(int64x1_t a,int64x1_t b)1670 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
1671   return vtst_s64(a, b);
1672 }
1673 
1674 // CHECK-LABEL: @test_vtst_u64(
1675 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1676 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1677 // CHECK:   [[TMP2:%.*]] = and <1 x i64> %a, %b
1678 // CHECK:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1679 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1680 // CHECK:   ret <1 x i64> [[VTST_I]]
test_vtst_u64(uint64x1_t a,uint64x1_t b)1681 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
1682   return vtst_u64(a, b);
1683 }
1684 
1685 // CHECK-LABEL: @test_vceq_s8(
1686 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1687 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1688 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t v1,int8x8_t v2)1689 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
1690   return vceq_s8(v1, v2);
1691 }
1692 
1693 // CHECK-LABEL: @test_vceq_s16(
1694 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1695 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1696 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t v1,int16x4_t v2)1697 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
1698   return vceq_s16(v1, v2);
1699 }
1700 
1701 // CHECK-LABEL: @test_vceq_s32(
1702 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1703 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1704 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t v1,int32x2_t v2)1705 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
1706   return vceq_s32(v1, v2);
1707 }
1708 
1709 // CHECK-LABEL: @test_vceq_s64(
1710 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1711 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1712 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_s64(int64x1_t a,int64x1_t b)1713 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
1714   return vceq_s64(a, b);
1715 }
1716 
1717 // CHECK-LABEL: @test_vceq_u64(
1718 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1719 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1720 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_u64(uint64x1_t a,uint64x1_t b)1721 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
1722   return vceq_u64(a, b);
1723 }
1724 
1725 // CHECK-LABEL: @test_vceq_f32(
1726 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1727 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1728 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t v1,float32x2_t v2)1729 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
1730   return vceq_f32(v1, v2);
1731 }
1732 
1733 // CHECK-LABEL: @test_vceq_f64(
1734 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1735 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1736 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_f64(float64x1_t a,float64x1_t b)1737 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
1738   return vceq_f64(a, b);
1739 }
1740 
1741 // CHECK-LABEL: @test_vceq_u8(
1742 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1743 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1744 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t v1,uint8x8_t v2)1745 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
1746   return vceq_u8(v1, v2);
1747 }
1748 
1749 // CHECK-LABEL: @test_vceq_u16(
1750 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1751 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1752 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t v1,uint16x4_t v2)1753 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
1754   return vceq_u16(v1, v2);
1755 }
1756 
1757 // CHECK-LABEL: @test_vceq_u32(
1758 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1759 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1760 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t v1,uint32x2_t v2)1761 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
1762   return vceq_u32(v1, v2);
1763 }
1764 
1765 // CHECK-LABEL: @test_vceq_p8(
1766 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1767 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1768 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t v1,poly8x8_t v2)1769 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
1770   return vceq_p8(v1, v2);
1771 }
1772 
1773 // CHECK-LABEL: @test_vceqq_s8(
1774 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1775 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1776 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t v1,int8x16_t v2)1777 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
1778   return vceqq_s8(v1, v2);
1779 }
1780 
1781 // CHECK-LABEL: @test_vceqq_s16(
1782 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1783 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1784 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t v1,int16x8_t v2)1785 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
1786   return vceqq_s16(v1, v2);
1787 }
1788 
1789 // CHECK-LABEL: @test_vceqq_s32(
1790 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1791 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1792 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t v1,int32x4_t v2)1793 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
1794   return vceqq_s32(v1, v2);
1795 }
1796 
1797 // CHECK-LABEL: @test_vceqq_f32(
1798 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1799 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1800 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t v1,float32x4_t v2)1801 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
1802   return vceqq_f32(v1, v2);
1803 }
1804 
1805 // CHECK-LABEL: @test_vceqq_u8(
1806 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1807 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1808 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t v1,uint8x16_t v2)1809 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
1810   return vceqq_u8(v1, v2);
1811 }
1812 
1813 // CHECK-LABEL: @test_vceqq_u16(
1814 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1815 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1816 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t v1,uint16x8_t v2)1817 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
1818   return vceqq_u16(v1, v2);
1819 }
1820 
1821 // CHECK-LABEL: @test_vceqq_u32(
1822 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1823 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1824 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t v1,uint32x4_t v2)1825 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
1826   return vceqq_u32(v1, v2);
1827 }
1828 
1829 // CHECK-LABEL: @test_vceqq_p8(
1830 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1831 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1832 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t v1,poly8x16_t v2)1833 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
1834   return vceqq_p8(v1, v2);
1835 }
1836 
1837 // CHECK-LABEL: @test_vceqq_s64(
1838 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1839 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1840 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_s64(int64x2_t v1,int64x2_t v2)1841 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
1842   return vceqq_s64(v1, v2);
1843 }
1844 
1845 // CHECK-LABEL: @test_vceqq_u64(
1846 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1847 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1848 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_u64(uint64x2_t v1,uint64x2_t v2)1849 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
1850   return vceqq_u64(v1, v2);
1851 }
1852 
1853 // CHECK-LABEL: @test_vceqq_f64(
1854 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
1855 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1856 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_f64(float64x2_t v1,float64x2_t v2)1857 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
1858   return vceqq_f64(v1, v2);
1859 }
1860 
1861 // CHECK-LABEL: @test_vcge_s8(
1862 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
1863 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1864 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t v1,int8x8_t v2)1865 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
1866   return vcge_s8(v1, v2);
1867 }
1868 
1869 // CHECK-LABEL: @test_vcge_s16(
1870 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
1871 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1872 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t v1,int16x4_t v2)1873 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
1874   return vcge_s16(v1, v2);
1875 }
1876 
1877 // CHECK-LABEL: @test_vcge_s32(
1878 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
1879 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1880 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t v1,int32x2_t v2)1881 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
1882   return vcge_s32(v1, v2);
1883 }
1884 
1885 // CHECK-LABEL: @test_vcge_s64(
1886 // CHECK:   [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
1887 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1888 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_s64(int64x1_t a,int64x1_t b)1889 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
1890   return vcge_s64(a, b);
1891 }
1892 
1893 // CHECK-LABEL: @test_vcge_u64(
1894 // CHECK:   [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
1895 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1896 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_u64(uint64x1_t a,uint64x1_t b)1897 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
1898   return vcge_u64(a, b);
1899 }
1900 
1901 // CHECK-LABEL: @test_vcge_f32(
1902 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
1903 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1904 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t v1,float32x2_t v2)1905 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
1906   return vcge_f32(v1, v2);
1907 }
1908 
1909 // CHECK-LABEL: @test_vcge_f64(
1910 // CHECK:   [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
1911 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1912 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_f64(float64x1_t a,float64x1_t b)1913 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
1914   return vcge_f64(a, b);
1915 }
1916 
1917 // CHECK-LABEL: @test_vcge_u8(
1918 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
1919 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1920 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t v1,uint8x8_t v2)1921 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
1922   return vcge_u8(v1, v2);
1923 }
1924 
1925 // CHECK-LABEL: @test_vcge_u16(
1926 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
1927 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1928 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t v1,uint16x4_t v2)1929 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
1930   return vcge_u16(v1, v2);
1931 }
1932 
1933 // CHECK-LABEL: @test_vcge_u32(
1934 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
1935 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1936 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t v1,uint32x2_t v2)1937 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
1938   return vcge_u32(v1, v2);
1939 }
1940 
1941 // CHECK-LABEL: @test_vcgeq_s8(
1942 // CHECK:   [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
1943 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1944 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t v1,int8x16_t v2)1945 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
1946   return vcgeq_s8(v1, v2);
1947 }
1948 
1949 // CHECK-LABEL: @test_vcgeq_s16(
1950 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
1951 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1952 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t v1,int16x8_t v2)1953 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
1954   return vcgeq_s16(v1, v2);
1955 }
1956 
1957 // CHECK-LABEL: @test_vcgeq_s32(
1958 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
1959 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1960 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t v1,int32x4_t v2)1961 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
1962   return vcgeq_s32(v1, v2);
1963 }
1964 
1965 // CHECK-LABEL: @test_vcgeq_f32(
1966 // CHECK:   [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
1967 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1968 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t v1,float32x4_t v2)1969 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
1970   return vcgeq_f32(v1, v2);
1971 }
1972 
1973 // CHECK-LABEL: @test_vcgeq_u8(
1974 // CHECK:   [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
1975 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1976 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t v1,uint8x16_t v2)1977 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
1978   return vcgeq_u8(v1, v2);
1979 }
1980 
1981 // CHECK-LABEL: @test_vcgeq_u16(
1982 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
1983 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1984 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t v1,uint16x8_t v2)1985 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
1986   return vcgeq_u16(v1, v2);
1987 }
1988 
1989 // CHECK-LABEL: @test_vcgeq_u32(
1990 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
1991 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1992 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t v1,uint32x4_t v2)1993 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
1994   return vcgeq_u32(v1, v2);
1995 }
1996 
1997 // CHECK-LABEL: @test_vcgeq_s64(
1998 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
1999 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2000 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_s64(int64x2_t v1,int64x2_t v2)2001 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
2002   return vcgeq_s64(v1, v2);
2003 }
2004 
2005 // CHECK-LABEL: @test_vcgeq_u64(
2006 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2007 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2008 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_u64(uint64x2_t v1,uint64x2_t v2)2009 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
2010   return vcgeq_u64(v1, v2);
2011 }
2012 
2013 // CHECK-LABEL: @test_vcgeq_f64(
2014 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2015 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2016 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_f64(float64x2_t v1,float64x2_t v2)2017 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
2018   return vcgeq_f64(v1, v2);
2019 }
2020 
2021 // CHECK-LABEL: @test_vcle_s8(
2022 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2023 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2024 // CHECK:   ret <8 x i8> [[SEXT_I]]
2025 // Notes about vcle:
2026 // LE condition predicate implemented as GE, so check reversed operands.
2027 // Using registers other than v0, v1 are possible, but would be odd.
test_vcle_s8(int8x8_t v1,int8x8_t v2)2028 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
2029   return vcle_s8(v1, v2);
2030 }
2031 
2032 // CHECK-LABEL: @test_vcle_s16(
2033 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2034 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2035 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t v1,int16x4_t v2)2036 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
2037   return vcle_s16(v1, v2);
2038 }
2039 
2040 // CHECK-LABEL: @test_vcle_s32(
2041 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2042 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2043 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t v1,int32x2_t v2)2044 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
2045   return vcle_s32(v1, v2);
2046 }
2047 
2048 // CHECK-LABEL: @test_vcle_s64(
2049 // CHECK:   [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2050 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2051 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_s64(int64x1_t a,int64x1_t b)2052 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
2053   return vcle_s64(a, b);
2054 }
2055 
2056 // CHECK-LABEL: @test_vcle_u64(
2057 // CHECK:   [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2058 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2059 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_u64(uint64x1_t a,uint64x1_t b)2060 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
2061   return vcle_u64(a, b);
2062 }
2063 
2064 // CHECK-LABEL: @test_vcle_f32(
2065 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2066 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2067 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t v1,float32x2_t v2)2068 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
2069   return vcle_f32(v1, v2);
2070 }
2071 
2072 // CHECK-LABEL: @test_vcle_f64(
2073 // CHECK:   [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2074 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2075 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_f64(float64x1_t a,float64x1_t b)2076 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
2077   return vcle_f64(a, b);
2078 }
2079 
2080 // CHECK-LABEL: @test_vcle_u8(
2081 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2082 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2083 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t v1,uint8x8_t v2)2084 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
2085   return vcle_u8(v1, v2);
2086 }
2087 
2088 // CHECK-LABEL: @test_vcle_u16(
2089 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2090 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2091 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t v1,uint16x4_t v2)2092 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
2093   return vcle_u16(v1, v2);
2094 }
2095 
2096 // CHECK-LABEL: @test_vcle_u32(
2097 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2098 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2099 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t v1,uint32x2_t v2)2100 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
2101   return vcle_u32(v1, v2);
2102 }
2103 
2104 // CHECK-LABEL: @test_vcleq_s8(
2105 // CHECK:   [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2106 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2107 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t v1,int8x16_t v2)2108 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
2109   return vcleq_s8(v1, v2);
2110 }
2111 
2112 // CHECK-LABEL: @test_vcleq_s16(
2113 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2114 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2115 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t v1,int16x8_t v2)2116 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
2117   return vcleq_s16(v1, v2);
2118 }
2119 
2120 // CHECK-LABEL: @test_vcleq_s32(
2121 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2122 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2123 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t v1,int32x4_t v2)2124 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
2125   return vcleq_s32(v1, v2);
2126 }
2127 
2128 // CHECK-LABEL: @test_vcleq_f32(
2129 // CHECK:   [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2130 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2131 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t v1,float32x4_t v2)2132 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
2133   return vcleq_f32(v1, v2);
2134 }
2135 
2136 // CHECK-LABEL: @test_vcleq_u8(
2137 // CHECK:   [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2138 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2139 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t v1,uint8x16_t v2)2140 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
2141   return vcleq_u8(v1, v2);
2142 }
2143 
2144 // CHECK-LABEL: @test_vcleq_u16(
2145 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2146 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2147 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t v1,uint16x8_t v2)2148 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
2149   return vcleq_u16(v1, v2);
2150 }
2151 
2152 // CHECK-LABEL: @test_vcleq_u32(
2153 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2154 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2155 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t v1,uint32x4_t v2)2156 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
2157   return vcleq_u32(v1, v2);
2158 }
2159 
2160 // CHECK-LABEL: @test_vcleq_s64(
2161 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2162 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2163 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_s64(int64x2_t v1,int64x2_t v2)2164 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
2165   return vcleq_s64(v1, v2);
2166 }
2167 
2168 // CHECK-LABEL: @test_vcleq_u64(
2169 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2170 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2171 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_u64(uint64x2_t v1,uint64x2_t v2)2172 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
2173   return vcleq_u64(v1, v2);
2174 }
2175 
2176 // CHECK-LABEL: @test_vcleq_f64(
2177 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2178 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2179 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_f64(float64x2_t v1,float64x2_t v2)2180 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
2181   return vcleq_f64(v1, v2);
2182 }
2183 
2184 // CHECK-LABEL: @test_vcgt_s8(
2185 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2186 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2187 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t v1,int8x8_t v2)2188 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
2189   return vcgt_s8(v1, v2);
2190 }
2191 
2192 // CHECK-LABEL: @test_vcgt_s16(
2193 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2194 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2195 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t v1,int16x4_t v2)2196 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
2197   return vcgt_s16(v1, v2);
2198 }
2199 
2200 // CHECK-LABEL: @test_vcgt_s32(
2201 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2202 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2203 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t v1,int32x2_t v2)2204 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
2205   return vcgt_s32(v1, v2);
2206 }
2207 
2208 // CHECK-LABEL: @test_vcgt_s64(
2209 // CHECK:   [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2210 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2211 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_s64(int64x1_t a,int64x1_t b)2212 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
2213   return vcgt_s64(a, b);
2214 }
2215 
2216 // CHECK-LABEL: @test_vcgt_u64(
2217 // CHECK:   [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2218 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2219 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_u64(uint64x1_t a,uint64x1_t b)2220 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
2221   return vcgt_u64(a, b);
2222 }
2223 
2224 // CHECK-LABEL: @test_vcgt_f32(
2225 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2226 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2227 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t v1,float32x2_t v2)2228 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
2229   return vcgt_f32(v1, v2);
2230 }
2231 
2232 // CHECK-LABEL: @test_vcgt_f64(
2233 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2234 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2235 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_f64(float64x1_t a,float64x1_t b)2236 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
2237   return vcgt_f64(a, b);
2238 }
2239 
2240 // CHECK-LABEL: @test_vcgt_u8(
2241 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2242 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2243 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t v1,uint8x8_t v2)2244 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
2245   return vcgt_u8(v1, v2);
2246 }
2247 
2248 // CHECK-LABEL: @test_vcgt_u16(
2249 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2250 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2251 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t v1,uint16x4_t v2)2252 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
2253   return vcgt_u16(v1, v2);
2254 }
2255 
2256 // CHECK-LABEL: @test_vcgt_u32(
2257 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2258 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2259 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t v1,uint32x2_t v2)2260 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
2261   return vcgt_u32(v1, v2);
2262 }
2263 
2264 // CHECK-LABEL: @test_vcgtq_s8(
2265 // CHECK:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2266 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2267 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t v1,int8x16_t v2)2268 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
2269   return vcgtq_s8(v1, v2);
2270 }
2271 
2272 // CHECK-LABEL: @test_vcgtq_s16(
2273 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2274 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2275 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t v1,int16x8_t v2)2276 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
2277   return vcgtq_s16(v1, v2);
2278 }
2279 
2280 // CHECK-LABEL: @test_vcgtq_s32(
2281 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2282 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2283 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t v1,int32x4_t v2)2284 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
2285   return vcgtq_s32(v1, v2);
2286 }
2287 
2288 // CHECK-LABEL: @test_vcgtq_f32(
2289 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2290 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2291 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t v1,float32x4_t v2)2292 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
2293   return vcgtq_f32(v1, v2);
2294 }
2295 
2296 // CHECK-LABEL: @test_vcgtq_u8(
2297 // CHECK:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2298 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2299 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t v1,uint8x16_t v2)2300 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
2301   return vcgtq_u8(v1, v2);
2302 }
2303 
2304 // CHECK-LABEL: @test_vcgtq_u16(
2305 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2306 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2307 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t v1,uint16x8_t v2)2308 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
2309   return vcgtq_u16(v1, v2);
2310 }
2311 
2312 // CHECK-LABEL: @test_vcgtq_u32(
2313 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2314 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2315 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t v1,uint32x4_t v2)2316 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
2317   return vcgtq_u32(v1, v2);
2318 }
2319 
2320 // CHECK-LABEL: @test_vcgtq_s64(
2321 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2322 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2323 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_s64(int64x2_t v1,int64x2_t v2)2324 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
2325   return vcgtq_s64(v1, v2);
2326 }
2327 
2328 // CHECK-LABEL: @test_vcgtq_u64(
2329 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2330 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2331 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_u64(uint64x2_t v1,uint64x2_t v2)2332 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
2333   return vcgtq_u64(v1, v2);
2334 }
2335 
2336 // CHECK-LABEL: @test_vcgtq_f64(
2337 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2338 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2339 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_f64(float64x2_t v1,float64x2_t v2)2340 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
2341   return vcgtq_f64(v1, v2);
2342 }
2343 
2344 // CHECK-LABEL: @test_vclt_s8(
2345 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2346 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2347 // CHECK:   ret <8 x i8> [[SEXT_I]]
2348 // Notes about vclt:
2349 // LT condition predicate implemented as GT, so check reversed operands.
2350 // Using registers other than v0, v1 are possible, but would be odd.
test_vclt_s8(int8x8_t v1,int8x8_t v2)2351 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
2352   return vclt_s8(v1, v2);
2353 }
2354 
2355 // CHECK-LABEL: @test_vclt_s16(
2356 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2357 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2358 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t v1,int16x4_t v2)2359 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
2360   return vclt_s16(v1, v2);
2361 }
2362 
2363 // CHECK-LABEL: @test_vclt_s32(
2364 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2365 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2366 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t v1,int32x2_t v2)2367 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
2368   return vclt_s32(v1, v2);
2369 }
2370 
2371 // CHECK-LABEL: @test_vclt_s64(
2372 // CHECK:   [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2373 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2374 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_s64(int64x1_t a,int64x1_t b)2375 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
2376   return vclt_s64(a, b);
2377 }
2378 
2379 // CHECK-LABEL: @test_vclt_u64(
2380 // CHECK:   [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2381 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2382 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_u64(uint64x1_t a,uint64x1_t b)2383 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
2384   return vclt_u64(a, b);
2385 }
2386 
2387 // CHECK-LABEL: @test_vclt_f32(
2388 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2389 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2390 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t v1,float32x2_t v2)2391 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
2392   return vclt_f32(v1, v2);
2393 }
2394 
2395 // CHECK-LABEL: @test_vclt_f64(
2396 // CHECK:   [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2397 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2398 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_f64(float64x1_t a,float64x1_t b)2399 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
2400   return vclt_f64(a, b);
2401 }
2402 
2403 // CHECK-LABEL: @test_vclt_u8(
2404 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2405 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2406 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t v1,uint8x8_t v2)2407 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
2408   return vclt_u8(v1, v2);
2409 }
2410 
2411 // CHECK-LABEL: @test_vclt_u16(
2412 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2413 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2414 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t v1,uint16x4_t v2)2415 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
2416   return vclt_u16(v1, v2);
2417 }
2418 
2419 // CHECK-LABEL: @test_vclt_u32(
2420 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2421 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2422 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t v1,uint32x2_t v2)2423 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
2424   return vclt_u32(v1, v2);
2425 }
2426 
2427 // CHECK-LABEL: @test_vcltq_s8(
2428 // CHECK:   [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2429 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2430 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t v1,int8x16_t v2)2431 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
2432   return vcltq_s8(v1, v2);
2433 }
2434 
2435 // CHECK-LABEL: @test_vcltq_s16(
2436 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2437 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2438 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t v1,int16x8_t v2)2439 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
2440   return vcltq_s16(v1, v2);
2441 }
2442 
2443 // CHECK-LABEL: @test_vcltq_s32(
2444 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2445 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2446 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t v1,int32x4_t v2)2447 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
2448   return vcltq_s32(v1, v2);
2449 }
2450 
2451 // CHECK-LABEL: @test_vcltq_f32(
2452 // CHECK:   [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2453 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2454 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t v1,float32x4_t v2)2455 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
2456   return vcltq_f32(v1, v2);
2457 }
2458 
2459 // CHECK-LABEL: @test_vcltq_u8(
2460 // CHECK:   [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2461 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2462 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t v1,uint8x16_t v2)2463 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
2464   return vcltq_u8(v1, v2);
2465 }
2466 
2467 // CHECK-LABEL: @test_vcltq_u16(
2468 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2469 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2470 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t v1,uint16x8_t v2)2471 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
2472   return vcltq_u16(v1, v2);
2473 }
2474 
2475 // CHECK-LABEL: @test_vcltq_u32(
2476 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2477 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2478 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t v1,uint32x4_t v2)2479 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
2480   return vcltq_u32(v1, v2);
2481 }
2482 
2483 // CHECK-LABEL: @test_vcltq_s64(
2484 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2485 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2486 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_s64(int64x2_t v1,int64x2_t v2)2487 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
2488   return vcltq_s64(v1, v2);
2489 }
2490 
2491 // CHECK-LABEL: @test_vcltq_u64(
2492 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2493 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2494 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_u64(uint64x2_t v1,uint64x2_t v2)2495 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
2496   return vcltq_u64(v1, v2);
2497 }
2498 
2499 // CHECK-LABEL: @test_vcltq_f64(
2500 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2501 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2502 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_f64(float64x2_t v1,float64x2_t v2)2503 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
2504   return vcltq_f64(v1, v2);
2505 }
2506 
2507 // CHECK-LABEL: @test_vhadd_s8(
2508 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2509 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t v1,int8x8_t v2)2510 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
2511   return vhadd_s8(v1, v2);
2512 }
2513 
2514 // CHECK-LABEL: @test_vhadd_s16(
2515 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2516 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2517 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2518 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2519 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_s16(int16x4_t v1,int16x4_t v2)2520 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
2521   return vhadd_s16(v1, v2);
2522 }
2523 
2524 // CHECK-LABEL: @test_vhadd_s32(
2525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2527 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2528 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2529 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_s32(int32x2_t v1,int32x2_t v2)2530 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
2531   return vhadd_s32(v1, v2);
2532 }
2533 
2534 // CHECK-LABEL: @test_vhadd_u8(
2535 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2536 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t v1,uint8x8_t v2)2537 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2538   return vhadd_u8(v1, v2);
2539 }
2540 
2541 // CHECK-LABEL: @test_vhadd_u16(
2542 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2543 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2544 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2545 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2546 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_u16(uint16x4_t v1,uint16x4_t v2)2547 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2548   return vhadd_u16(v1, v2);
2549 }
2550 
2551 // CHECK-LABEL: @test_vhadd_u32(
2552 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2553 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2554 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2555 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2556 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_u32(uint32x2_t v1,uint32x2_t v2)2557 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2558   return vhadd_u32(v1, v2);
2559 }
2560 
2561 // CHECK-LABEL: @test_vhaddq_s8(
2562 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2563 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t v1,int8x16_t v2)2564 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
2565   return vhaddq_s8(v1, v2);
2566 }
2567 
2568 // CHECK-LABEL: @test_vhaddq_s16(
2569 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2570 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2571 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2572 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2573 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_s16(int16x8_t v1,int16x8_t v2)2574 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
2575   return vhaddq_s16(v1, v2);
2576 }
2577 
2578 // CHECK-LABEL: @test_vhaddq_s32(
2579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2580 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2581 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2582 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2583 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_s32(int32x4_t v1,int32x4_t v2)2584 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
2585   return vhaddq_s32(v1, v2);
2586 }
2587 
2588 // CHECK-LABEL: @test_vhaddq_u8(
2589 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2590 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t v1,uint8x16_t v2)2591 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2592   return vhaddq_u8(v1, v2);
2593 }
2594 
2595 // CHECK-LABEL: @test_vhaddq_u16(
2596 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2597 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2598 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2599 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2600 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_u16(uint16x8_t v1,uint16x8_t v2)2601 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2602   return vhaddq_u16(v1, v2);
2603 }
2604 
2605 // CHECK-LABEL: @test_vhaddq_u32(
2606 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2607 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2608 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2609 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2610 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_u32(uint32x4_t v1,uint32x4_t v2)2611 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2612   return vhaddq_u32(v1, v2);
2613 }
2614 
2615 // CHECK-LABEL: @test_vhsub_s8(
2616 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2617 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t v1,int8x8_t v2)2618 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
2619   return vhsub_s8(v1, v2);
2620 }
2621 
2622 // CHECK-LABEL: @test_vhsub_s16(
2623 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2624 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2625 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2626 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2627 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_s16(int16x4_t v1,int16x4_t v2)2628 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
2629   return vhsub_s16(v1, v2);
2630 }
2631 
2632 // CHECK-LABEL: @test_vhsub_s32(
2633 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2634 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2635 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2636 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2637 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_s32(int32x2_t v1,int32x2_t v2)2638 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
2639   return vhsub_s32(v1, v2);
2640 }
2641 
2642 // CHECK-LABEL: @test_vhsub_u8(
2643 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2644 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t v1,uint8x8_t v2)2645 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
2646   return vhsub_u8(v1, v2);
2647 }
2648 
2649 // CHECK-LABEL: @test_vhsub_u16(
2650 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2651 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2652 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2653 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2654 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_u16(uint16x4_t v1,uint16x4_t v2)2655 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
2656   return vhsub_u16(v1, v2);
2657 }
2658 
2659 // CHECK-LABEL: @test_vhsub_u32(
2660 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2661 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2662 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2663 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2664 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_u32(uint32x2_t v1,uint32x2_t v2)2665 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
2666   return vhsub_u32(v1, v2);
2667 }
2668 
2669 // CHECK-LABEL: @test_vhsubq_s8(
2670 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2671 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t v1,int8x16_t v2)2672 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
2673   return vhsubq_s8(v1, v2);
2674 }
2675 
2676 // CHECK-LABEL: @test_vhsubq_s16(
2677 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2678 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2679 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2680 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2681 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_s16(int16x8_t v1,int16x8_t v2)2682 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
2683   return vhsubq_s16(v1, v2);
2684 }
2685 
2686 // CHECK-LABEL: @test_vhsubq_s32(
2687 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2688 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2689 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2690 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2691 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_s32(int32x4_t v1,int32x4_t v2)2692 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
2693   return vhsubq_s32(v1, v2);
2694 }
2695 
2696 // CHECK-LABEL: @test_vhsubq_u8(
2697 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2698 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t v1,uint8x16_t v2)2699 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
2700   return vhsubq_u8(v1, v2);
2701 }
2702 
2703 // CHECK-LABEL: @test_vhsubq_u16(
2704 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2705 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2706 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2707 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2708 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_u16(uint16x8_t v1,uint16x8_t v2)2709 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
2710   return vhsubq_u16(v1, v2);
2711 }
2712 
2713 // CHECK-LABEL: @test_vhsubq_u32(
2714 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2715 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2716 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2717 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2718 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_u32(uint32x4_t v1,uint32x4_t v2)2719 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
2720   return vhsubq_u32(v1, v2);
2721 }
2722 
2723 // CHECK-LABEL: @test_vrhadd_s8(
2724 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2725 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t v1,int8x8_t v2)2726 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
2727   return vrhadd_s8(v1, v2);
2728 }
2729 
2730 // CHECK-LABEL: @test_vrhadd_s16(
2731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2732 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2733 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2734 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2735 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_s16(int16x4_t v1,int16x4_t v2)2736 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
2737   return vrhadd_s16(v1, v2);
2738 }
2739 
2740 // CHECK-LABEL: @test_vrhadd_s32(
2741 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2742 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2743 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2744 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2745 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_s32(int32x2_t v1,int32x2_t v2)2746 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
2747   return vrhadd_s32(v1, v2);
2748 }
2749 
2750 // CHECK-LABEL: @test_vrhadd_u8(
2751 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2752 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t v1,uint8x8_t v2)2753 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2754   return vrhadd_u8(v1, v2);
2755 }
2756 
2757 // CHECK-LABEL: @test_vrhadd_u16(
2758 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2759 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2760 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2761 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2762 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_u16(uint16x4_t v1,uint16x4_t v2)2763 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2764   return vrhadd_u16(v1, v2);
2765 }
2766 
2767 // CHECK-LABEL: @test_vrhadd_u32(
2768 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2769 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2770 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2771 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2772 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_u32(uint32x2_t v1,uint32x2_t v2)2773 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2774   return vrhadd_u32(v1, v2);
2775 }
2776 
2777 // CHECK-LABEL: @test_vrhaddq_s8(
2778 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2779 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t v1,int8x16_t v2)2780 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
2781   return vrhaddq_s8(v1, v2);
2782 }
2783 
2784 // CHECK-LABEL: @test_vrhaddq_s16(
2785 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2786 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2787 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2788 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2789 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_s16(int16x8_t v1,int16x8_t v2)2790 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
2791   return vrhaddq_s16(v1, v2);
2792 }
2793 
2794 // CHECK-LABEL: @test_vrhaddq_s32(
2795 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2796 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2797 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2798 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2799 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_s32(int32x4_t v1,int32x4_t v2)2800 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
2801   return vrhaddq_s32(v1, v2);
2802 }
2803 
2804 // CHECK-LABEL: @test_vrhaddq_u8(
2805 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2806 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t v1,uint8x16_t v2)2807 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2808   return vrhaddq_u8(v1, v2);
2809 }
2810 
2811 // CHECK-LABEL: @test_vrhaddq_u16(
2812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2813 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2814 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2815 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2816 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_u16(uint16x8_t v1,uint16x8_t v2)2817 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2818   return vrhaddq_u16(v1, v2);
2819 }
2820 
2821 // CHECK-LABEL: @test_vrhaddq_u32(
2822 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2823 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2824 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2825 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2826 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_u32(uint32x4_t v1,uint32x4_t v2)2827 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2828   return vrhaddq_u32(v1, v2);
2829 }
2830 
2831 // CHECK-LABEL: @test_vqadd_s8(
2832 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2833 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)2834 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
2835   return vqadd_s8(a, b);
2836 }
2837 
2838 // CHECK-LABEL: @test_vqadd_s16(
2839 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2840 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2841 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2842 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2843 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_s16(int16x4_t a,int16x4_t b)2844 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
2845   return vqadd_s16(a, b);
2846 }
2847 
2848 // CHECK-LABEL: @test_vqadd_s32(
2849 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2850 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2851 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2852 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2853 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_s32(int32x2_t a,int32x2_t b)2854 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
2855   return vqadd_s32(a, b);
2856 }
2857 
2858 // CHECK-LABEL: @test_vqadd_s64(
2859 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2860 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2861 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2862 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2863 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_s64(int64x1_t a,int64x1_t b)2864 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
2865   return vqadd_s64(a, b);
2866 }
2867 
2868 // CHECK-LABEL: @test_vqadd_u8(
2869 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2870 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)2871 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
2872   return vqadd_u8(a, b);
2873 }
2874 
2875 // CHECK-LABEL: @test_vqadd_u16(
2876 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2877 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2878 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2879 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2880 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)2881 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
2882   return vqadd_u16(a, b);
2883 }
2884 
2885 // CHECK-LABEL: @test_vqadd_u32(
2886 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2887 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2888 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2889 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2890 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)2891 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
2892   return vqadd_u32(a, b);
2893 }
2894 
2895 // CHECK-LABEL: @test_vqadd_u64(
2896 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2897 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2898 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2899 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2900 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)2901 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
2902   return vqadd_u64(a, b);
2903 }
2904 
2905 // CHECK-LABEL: @test_vqaddq_s8(
2906 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2907 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)2908 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
2909   return vqaddq_s8(a, b);
2910 }
2911 
2912 // CHECK-LABEL: @test_vqaddq_s16(
2913 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2914 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2915 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2916 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2917 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)2918 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
2919   return vqaddq_s16(a, b);
2920 }
2921 
2922 // CHECK-LABEL: @test_vqaddq_s32(
2923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2925 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2926 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2927 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)2928 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
2929   return vqaddq_s32(a, b);
2930 }
2931 
2932 // CHECK-LABEL: @test_vqaddq_s64(
2933 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2934 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2935 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2936 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2937 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)2938 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
2939   return vqaddq_s64(a, b);
2940 }
2941 
2942 // CHECK-LABEL: @test_vqaddq_u8(
2943 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2944 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)2945 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
2946   return vqaddq_u8(a, b);
2947 }
2948 
2949 // CHECK-LABEL: @test_vqaddq_u16(
2950 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2951 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2952 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2953 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2954 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)2955 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
2956   return vqaddq_u16(a, b);
2957 }
2958 
2959 // CHECK-LABEL: @test_vqaddq_u32(
2960 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2961 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2962 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2963 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2964 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)2965 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
2966   return vqaddq_u32(a, b);
2967 }
2968 
2969 // CHECK-LABEL: @test_vqaddq_u64(
2970 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2971 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2972 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2973 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2974 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)2975 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
2976   return vqaddq_u64(a, b);
2977 }
2978 
2979 // CHECK-LABEL: @test_vqsub_s8(
2980 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
2981 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)2982 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
2983   return vqsub_s8(a, b);
2984 }
2985 
2986 // CHECK-LABEL: @test_vqsub_s16(
2987 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2988 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2989 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
2990 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
2991 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_s16(int16x4_t a,int16x4_t b)2992 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
2993   return vqsub_s16(a, b);
2994 }
2995 
2996 // CHECK-LABEL: @test_vqsub_s32(
2997 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2998 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2999 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3000 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3001 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_s32(int32x2_t a,int32x2_t b)3002 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
3003   return vqsub_s32(a, b);
3004 }
3005 
3006 // CHECK-LABEL: @test_vqsub_s64(
3007 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3008 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3009 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3010 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3011 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_s64(int64x1_t a,int64x1_t b)3012 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
3013   return vqsub_s64(a, b);
3014 }
3015 
3016 // CHECK-LABEL: @test_vqsub_u8(
3017 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
3018 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)3019 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
3020   return vqsub_u8(a, b);
3021 }
3022 
3023 // CHECK-LABEL: @test_vqsub_u16(
3024 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3025 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3026 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
3027 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3028 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)3029 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
3030   return vqsub_u16(a, b);
3031 }
3032 
3033 // CHECK-LABEL: @test_vqsub_u32(
3034 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3035 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3036 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3037 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3038 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)3039 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
3040   return vqsub_u32(a, b);
3041 }
3042 
3043 // CHECK-LABEL: @test_vqsub_u64(
3044 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3045 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3046 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3047 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3048 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)3049 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
3050   return vqsub_u64(a, b);
3051 }
3052 
3053 // CHECK-LABEL: @test_vqsubq_s8(
3054 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3055 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)3056 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
3057   return vqsubq_s8(a, b);
3058 }
3059 
3060 // CHECK-LABEL: @test_vqsubq_s16(
3061 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3062 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3063 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3064 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3065 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)3066 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
3067   return vqsubq_s16(a, b);
3068 }
3069 
3070 // CHECK-LABEL: @test_vqsubq_s32(
3071 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3072 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3073 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3074 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3075 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)3076 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
3077   return vqsubq_s32(a, b);
3078 }
3079 
3080 // CHECK-LABEL: @test_vqsubq_s64(
3081 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3082 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3083 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3084 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3085 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)3086 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
3087   return vqsubq_s64(a, b);
3088 }
3089 
3090 // CHECK-LABEL: @test_vqsubq_u8(
3091 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3092 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)3093 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
3094   return vqsubq_u8(a, b);
3095 }
3096 
3097 // CHECK-LABEL: @test_vqsubq_u16(
3098 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3099 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3100 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3101 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3102 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)3103 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
3104   return vqsubq_u16(a, b);
3105 }
3106 
3107 // CHECK-LABEL: @test_vqsubq_u32(
3108 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3109 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3110 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3111 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3112 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)3113 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
3114   return vqsubq_u32(a, b);
3115 }
3116 
3117 // CHECK-LABEL: @test_vqsubq_u64(
3118 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3119 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3120 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3121 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3122 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)3123 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
3124   return vqsubq_u64(a, b);
3125 }
3126 
3127 // CHECK-LABEL: @test_vshl_s8(
3128 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3129 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)3130 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
3131   return vshl_s8(a, b);
3132 }
3133 
3134 // CHECK-LABEL: @test_vshl_s16(
3135 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3136 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3137 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3138 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3139 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_s16(int16x4_t a,int16x4_t b)3140 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
3141   return vshl_s16(a, b);
3142 }
3143 
3144 // CHECK-LABEL: @test_vshl_s32(
3145 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3146 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3147 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3148 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3149 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_s32(int32x2_t a,int32x2_t b)3150 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
3151   return vshl_s32(a, b);
3152 }
3153 
3154 // CHECK-LABEL: @test_vshl_s64(
3155 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3156 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3157 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3158 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3159 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_s64(int64x1_t a,int64x1_t b)3160 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
3161   return vshl_s64(a, b);
3162 }
3163 
3164 // CHECK-LABEL: @test_vshl_u8(
3165 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b)
3166 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)3167 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
3168   return vshl_u8(a, b);
3169 }
3170 
3171 // CHECK-LABEL: @test_vshl_u16(
3172 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3173 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3174 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b)
3175 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3176 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_u16(uint16x4_t a,int16x4_t b)3177 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
3178   return vshl_u16(a, b);
3179 }
3180 
3181 // CHECK-LABEL: @test_vshl_u32(
3182 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3183 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3184 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b)
3185 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3186 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_u32(uint32x2_t a,int32x2_t b)3187 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
3188   return vshl_u32(a, b);
3189 }
3190 
3191 // CHECK-LABEL: @test_vshl_u64(
3192 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3193 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3194 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b)
3195 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3196 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_u64(uint64x1_t a,int64x1_t b)3197 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
3198   return vshl_u64(a, b);
3199 }
3200 
3201 // CHECK-LABEL: @test_vshlq_s8(
3202 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3203 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)3204 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3205   return vshlq_s8(a, b);
3206 }
3207 
3208 // CHECK-LABEL: @test_vshlq_s16(
3209 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3210 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3211 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3212 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3213 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_s16(int16x8_t a,int16x8_t b)3214 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3215   return vshlq_s16(a, b);
3216 }
3217 
3218 // CHECK-LABEL: @test_vshlq_s32(
3219 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3220 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3221 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3222 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3223 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_s32(int32x4_t a,int32x4_t b)3224 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3225   return vshlq_s32(a, b);
3226 }
3227 
3228 // CHECK-LABEL: @test_vshlq_s64(
3229 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3230 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3231 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3232 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3233 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_s64(int64x2_t a,int64x2_t b)3234 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3235   return vshlq_s64(a, b);
3236 }
3237 
3238 // CHECK-LABEL: @test_vshlq_u8(
3239 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
3240 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)3241 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3242   return vshlq_u8(a, b);
3243 }
3244 
3245 // CHECK-LABEL: @test_vshlq_u16(
3246 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3247 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3248 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
3249 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3250 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)3251 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3252   return vshlq_u16(a, b);
3253 }
3254 
3255 // CHECK-LABEL: @test_vshlq_u32(
3256 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3257 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3258 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
3259 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3260 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)3261 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3262   return vshlq_u32(a, b);
3263 }
3264 
3265 // CHECK-LABEL: @test_vshlq_u64(
3266 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3267 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3268 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
3269 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3270 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)3271 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3272   return vshlq_u64(a, b);
3273 }
3274 
3275 // CHECK-LABEL: @test_vqshl_s8(
3276 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3277 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)3278 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
3279   return vqshl_s8(a, b);
3280 }
3281 
3282 // CHECK-LABEL: @test_vqshl_s16(
3283 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3286 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3287 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_s16(int16x4_t a,int16x4_t b)3288 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
3289   return vqshl_s16(a, b);
3290 }
3291 
3292 // CHECK-LABEL: @test_vqshl_s32(
3293 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3294 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3295 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3296 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3297 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_s32(int32x2_t a,int32x2_t b)3298 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
3299   return vqshl_s32(a, b);
3300 }
3301 
3302 // CHECK-LABEL: @test_vqshl_s64(
3303 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3304 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3305 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3306 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3307 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_s64(int64x1_t a,int64x1_t b)3308 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
3309   return vqshl_s64(a, b);
3310 }
3311 
3312 // CHECK-LABEL: @test_vqshl_u8(
3313 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3314 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)3315 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
3316   return vqshl_u8(a, b);
3317 }
3318 
3319 // CHECK-LABEL: @test_vqshl_u16(
3320 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3321 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3322 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3323 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3324 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)3325 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
3326   return vqshl_u16(a, b);
3327 }
3328 
3329 // CHECK-LABEL: @test_vqshl_u32(
3330 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3331 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3332 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3333 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3334 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)3335 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
3336   return vqshl_u32(a, b);
3337 }
3338 
3339 // CHECK-LABEL: @test_vqshl_u64(
3340 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3341 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3342 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3343 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3344 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)3345 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
3346   return vqshl_u64(a, b);
3347 }
3348 
3349 // CHECK-LABEL: @test_vqshlq_s8(
3350 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3351 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)3352 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
3353   return vqshlq_s8(a, b);
3354 }
3355 
3356 // CHECK-LABEL: @test_vqshlq_s16(
3357 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3358 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3359 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3360 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3361 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)3362 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
3363   return vqshlq_s16(a, b);
3364 }
3365 
3366 // CHECK-LABEL: @test_vqshlq_s32(
3367 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3368 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3369 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3370 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3371 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)3372 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
3373   return vqshlq_s32(a, b);
3374 }
3375 
3376 // CHECK-LABEL: @test_vqshlq_s64(
3377 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3378 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3379 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3380 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3381 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)3382 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
3383   return vqshlq_s64(a, b);
3384 }
3385 
3386 // CHECK-LABEL: @test_vqshlq_u8(
3387 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3388 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)3389 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
3390   return vqshlq_u8(a, b);
3391 }
3392 
3393 // CHECK-LABEL: @test_vqshlq_u16(
3394 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3395 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3396 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3397 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3398 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)3399 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
3400   return vqshlq_u16(a, b);
3401 }
3402 
3403 // CHECK-LABEL: @test_vqshlq_u32(
3404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3405 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3406 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3407 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3408 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)3409 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
3410   return vqshlq_u32(a, b);
3411 }
3412 
3413 // CHECK-LABEL: @test_vqshlq_u64(
3414 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3415 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3416 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3417 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3418 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)3419 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
3420   return vqshlq_u64(a, b);
3421 }
3422 
3423 // CHECK-LABEL: @test_vrshl_s8(
3424 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3425 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)3426 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3427   return vrshl_s8(a, b);
3428 }
3429 
3430 // CHECK-LABEL: @test_vrshl_s16(
3431 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3432 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3433 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3434 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3435 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_s16(int16x4_t a,int16x4_t b)3436 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3437   return vrshl_s16(a, b);
3438 }
3439 
3440 // CHECK-LABEL: @test_vrshl_s32(
3441 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3442 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3443 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3444 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3445 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_s32(int32x2_t a,int32x2_t b)3446 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3447   return vrshl_s32(a, b);
3448 }
3449 
3450 // CHECK-LABEL: @test_vrshl_s64(
3451 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3452 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3453 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3454 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3455 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_s64(int64x1_t a,int64x1_t b)3456 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3457   return vrshl_s64(a, b);
3458 }
3459 
3460 // CHECK-LABEL: @test_vrshl_u8(
3461 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3462 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)3463 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3464   return vrshl_u8(a, b);
3465 }
3466 
3467 // CHECK-LABEL: @test_vrshl_u16(
3468 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3471 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3472 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)3473 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3474   return vrshl_u16(a, b);
3475 }
3476 
3477 // CHECK-LABEL: @test_vrshl_u32(
3478 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3479 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3480 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3481 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3482 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)3483 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3484   return vrshl_u32(a, b);
3485 }
3486 
3487 // CHECK-LABEL: @test_vrshl_u64(
3488 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3489 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3490 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3491 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3492 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)3493 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3494   return vrshl_u64(a, b);
3495 }
3496 
3497 // CHECK-LABEL: @test_vrshlq_s8(
3498 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3499 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)3500 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3501   return vrshlq_s8(a, b);
3502 }
3503 
3504 // CHECK-LABEL: @test_vrshlq_s16(
3505 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3506 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3507 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3508 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3509 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)3510 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3511   return vrshlq_s16(a, b);
3512 }
3513 
3514 // CHECK-LABEL: @test_vrshlq_s32(
3515 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3516 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3517 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3518 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3519 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)3520 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3521   return vrshlq_s32(a, b);
3522 }
3523 
3524 // CHECK-LABEL: @test_vrshlq_s64(
3525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3527 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3528 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3529 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)3530 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3531   return vrshlq_s64(a, b);
3532 }
3533 
3534 // CHECK-LABEL: @test_vrshlq_u8(
3535 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3536 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)3537 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3538   return vrshlq_u8(a, b);
3539 }
3540 
3541 // CHECK-LABEL: @test_vrshlq_u16(
3542 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3543 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3544 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3545 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3546 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)3547 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3548   return vrshlq_u16(a, b);
3549 }
3550 
3551 // CHECK-LABEL: @test_vrshlq_u32(
3552 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3553 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3554 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3555 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3556 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)3557 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3558   return vrshlq_u32(a, b);
3559 }
3560 
3561 // CHECK-LABEL: @test_vrshlq_u64(
3562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3563 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3564 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3565 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3566 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)3567 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3568   return vrshlq_u64(a, b);
3569 }
3570 
3571 // CHECK-LABEL: @test_vqrshl_s8(
3572 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3573 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)3574 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
3575   return vqrshl_s8(a, b);
3576 }
3577 
3578 // CHECK-LABEL: @test_vqrshl_s16(
3579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3580 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3581 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3582 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3583 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)3584 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
3585   return vqrshl_s16(a, b);
3586 }
3587 
3588 // CHECK-LABEL: @test_vqrshl_s32(
3589 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3590 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3591 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3592 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3593 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)3594 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
3595   return vqrshl_s32(a, b);
3596 }
3597 
3598 // CHECK-LABEL: @test_vqrshl_s64(
3599 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3600 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3601 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3602 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3603 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)3604 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
3605   return vqrshl_s64(a, b);
3606 }
3607 
3608 // CHECK-LABEL: @test_vqrshl_u8(
3609 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3610 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)3611 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
3612   return vqrshl_u8(a, b);
3613 }
3614 
3615 // CHECK-LABEL: @test_vqrshl_u16(
3616 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3617 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3618 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3619 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3620 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)3621 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
3622   return vqrshl_u16(a, b);
3623 }
3624 
3625 // CHECK-LABEL: @test_vqrshl_u32(
3626 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3628 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3629 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3630 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)3631 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
3632   return vqrshl_u32(a, b);
3633 }
3634 
3635 // CHECK-LABEL: @test_vqrshl_u64(
3636 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3637 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3638 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3639 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3640 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)3641 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
3642   return vqrshl_u64(a, b);
3643 }
3644 
3645 // CHECK-LABEL: @test_vqrshlq_s8(
3646 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3647 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)3648 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
3649   return vqrshlq_s8(a, b);
3650 }
3651 
3652 // CHECK-LABEL: @test_vqrshlq_s16(
3653 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3654 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3655 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3656 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3657 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)3658 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
3659   return vqrshlq_s16(a, b);
3660 }
3661 
3662 // CHECK-LABEL: @test_vqrshlq_s32(
3663 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3664 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3665 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3666 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3667 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)3668 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
3669   return vqrshlq_s32(a, b);
3670 }
3671 
3672 // CHECK-LABEL: @test_vqrshlq_s64(
3673 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3674 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3675 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3676 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3677 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)3678 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
3679   return vqrshlq_s64(a, b);
3680 }
3681 
3682 // CHECK-LABEL: @test_vqrshlq_u8(
3683 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3684 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)3685 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
3686   return vqrshlq_u8(a, b);
3687 }
3688 
3689 // CHECK-LABEL: @test_vqrshlq_u16(
3690 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3691 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3692 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3693 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3694 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)3695 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
3696   return vqrshlq_u16(a, b);
3697 }
3698 
3699 // CHECK-LABEL: @test_vqrshlq_u32(
3700 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3701 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3702 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3703 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3704 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)3705 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
3706   return vqrshlq_u32(a, b);
3707 }
3708 
3709 // CHECK-LABEL: @test_vqrshlq_u64(
3710 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3711 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3712 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3713 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3714 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)3715 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
3716   return vqrshlq_u64(a, b);
3717 }
3718 
3719 // CHECK-LABEL: @test_vsli_n_p64(
3720 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3721 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3722 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3723 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3724 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
3725 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_p64(poly64x1_t a,poly64x1_t b)3726 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
3727   return vsli_n_p64(a, b, 0);
3728 }
3729 
3730 // CHECK-LABEL: @test_vsliq_n_p64(
3731 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3732 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3733 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3734 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3735 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
3736 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_p64(poly64x2_t a,poly64x2_t b)3737 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
3738   return vsliq_n_p64(a, b, 0);
3739 }
3740 
3741 // CHECK-LABEL: @test_vmax_s8(
3742 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
3743 // CHECK:   ret <8 x i8> [[VMAX_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)3744 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
3745   return vmax_s8(a, b);
3746 }
3747 
3748 // CHECK-LABEL: @test_vmax_s16(
3749 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3750 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3751 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
3752 // CHECK:   ret <4 x i16> [[VMAX2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)3753 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
3754   return vmax_s16(a, b);
3755 }
3756 
3757 // CHECK-LABEL: @test_vmax_s32(
3758 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3759 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3760 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
3761 // CHECK:   ret <2 x i32> [[VMAX2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)3762 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
3763   return vmax_s32(a, b);
3764 }
3765 
3766 // CHECK-LABEL: @test_vmax_u8(
3767 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
3768 // CHECK:   ret <8 x i8> [[VMAX_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)3769 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
3770   return vmax_u8(a, b);
3771 }
3772 
3773 // CHECK-LABEL: @test_vmax_u16(
3774 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3775 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3776 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
3777 // CHECK:   ret <4 x i16> [[VMAX2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)3778 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
3779   return vmax_u16(a, b);
3780 }
3781 
3782 // CHECK-LABEL: @test_vmax_u32(
3783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3784 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3785 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
3786 // CHECK:   ret <2 x i32> [[VMAX2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)3787 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
3788   return vmax_u32(a, b);
3789 }
3790 
3791 // CHECK-LABEL: @test_vmax_f32(
3792 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3793 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3794 // CHECK:   [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
3795 // CHECK:   ret <2 x float> [[VMAX2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)3796 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
3797   return vmax_f32(a, b);
3798 }
3799 
3800 // CHECK-LABEL: @test_vmaxq_s8(
3801 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
3802 // CHECK:   ret <16 x i8> [[VMAX_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)3803 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
3804   return vmaxq_s8(a, b);
3805 }
3806 
3807 // CHECK-LABEL: @test_vmaxq_s16(
3808 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3809 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3810 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
3811 // CHECK:   ret <8 x i16> [[VMAX2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)3812 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
3813   return vmaxq_s16(a, b);
3814 }
3815 
3816 // CHECK-LABEL: @test_vmaxq_s32(
3817 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3818 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3819 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
3820 // CHECK:   ret <4 x i32> [[VMAX2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)3821 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
3822   return vmaxq_s32(a, b);
3823 }
3824 
3825 // CHECK-LABEL: @test_vmaxq_u8(
3826 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
3827 // CHECK:   ret <16 x i8> [[VMAX_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)3828 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
3829   return vmaxq_u8(a, b);
3830 }
3831 
3832 // CHECK-LABEL: @test_vmaxq_u16(
3833 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3834 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3835 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
3836 // CHECK:   ret <8 x i16> [[VMAX2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)3837 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
3838   return vmaxq_u16(a, b);
3839 }
3840 
3841 // CHECK-LABEL: @test_vmaxq_u32(
3842 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3843 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3844 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
3845 // CHECK:   ret <4 x i32> [[VMAX2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)3846 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
3847   return vmaxq_u32(a, b);
3848 }
3849 
3850 // CHECK-LABEL: @test_vmaxq_f32(
3851 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3852 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3853 // CHECK:   [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
3854 // CHECK:   ret <4 x float> [[VMAX2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)3855 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
3856   return vmaxq_f32(a, b);
3857 }
3858 
3859 // CHECK-LABEL: @test_vmaxq_f64(
3860 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3861 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3862 // CHECK:   [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
3863 // CHECK:   ret <2 x double> [[VMAX2_I]]
test_vmaxq_f64(float64x2_t a,float64x2_t b)3864 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
3865   return vmaxq_f64(a, b);
3866 }
3867 
3868 // CHECK-LABEL: @test_vmin_s8(
3869 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
3870 // CHECK:   ret <8 x i8> [[VMIN_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)3871 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
3872   return vmin_s8(a, b);
3873 }
3874 
3875 // CHECK-LABEL: @test_vmin_s16(
3876 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3877 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3878 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
3879 // CHECK:   ret <4 x i16> [[VMIN2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)3880 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
3881   return vmin_s16(a, b);
3882 }
3883 
3884 // CHECK-LABEL: @test_vmin_s32(
3885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3886 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3887 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
3888 // CHECK:   ret <2 x i32> [[VMIN2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)3889 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
3890   return vmin_s32(a, b);
3891 }
3892 
3893 // CHECK-LABEL: @test_vmin_u8(
3894 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
3895 // CHECK:   ret <8 x i8> [[VMIN_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)3896 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
3897   return vmin_u8(a, b);
3898 }
3899 
3900 // CHECK-LABEL: @test_vmin_u16(
3901 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3902 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3903 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
3904 // CHECK:   ret <4 x i16> [[VMIN2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)3905 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
3906   return vmin_u16(a, b);
3907 }
3908 
3909 // CHECK-LABEL: @test_vmin_u32(
3910 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3911 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3912 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
3913 // CHECK:   ret <2 x i32> [[VMIN2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)3914 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
3915   return vmin_u32(a, b);
3916 }
3917 
3918 // CHECK-LABEL: @test_vmin_f32(
3919 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3920 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3921 // CHECK:   [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b)
3922 // CHECK:   ret <2 x float> [[VMIN2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)3923 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
3924   return vmin_f32(a, b);
3925 }
3926 
3927 // CHECK-LABEL: @test_vminq_s8(
3928 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
3929 // CHECK:   ret <16 x i8> [[VMIN_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)3930 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
3931   return vminq_s8(a, b);
3932 }
3933 
3934 // CHECK-LABEL: @test_vminq_s16(
3935 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3936 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3937 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
3938 // CHECK:   ret <8 x i16> [[VMIN2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)3939 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
3940   return vminq_s16(a, b);
3941 }
3942 
3943 // CHECK-LABEL: @test_vminq_s32(
3944 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3945 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3946 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
3947 // CHECK:   ret <4 x i32> [[VMIN2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)3948 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
3949   return vminq_s32(a, b);
3950 }
3951 
3952 // CHECK-LABEL: @test_vminq_u8(
3953 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
3954 // CHECK:   ret <16 x i8> [[VMIN_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)3955 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
3956   return vminq_u8(a, b);
3957 }
3958 
3959 // CHECK-LABEL: @test_vminq_u16(
3960 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3961 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3962 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
3963 // CHECK:   ret <8 x i16> [[VMIN2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)3964 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
3965   return vminq_u16(a, b);
3966 }
3967 
3968 // CHECK-LABEL: @test_vminq_u32(
3969 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3970 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3971 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
3972 // CHECK:   ret <4 x i32> [[VMIN2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)3973 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
3974   return vminq_u32(a, b);
3975 }
3976 
3977 // CHECK-LABEL: @test_vminq_f32(
3978 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3979 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3980 // CHECK:   [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b)
3981 // CHECK:   ret <4 x float> [[VMIN2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)3982 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
3983   return vminq_f32(a, b);
3984 }
3985 
3986 // CHECK-LABEL: @test_vminq_f64(
3987 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3988 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3989 // CHECK:   [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b)
3990 // CHECK:   ret <2 x double> [[VMIN2_I]]
test_vminq_f64(float64x2_t a,float64x2_t b)3991 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
3992   return vminq_f64(a, b);
3993 }
3994 
3995 // CHECK-LABEL: @test_vmaxnm_f32(
3996 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3997 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3998 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
3999 // CHECK:   ret <2 x float> [[VMAXNM2_I]]
test_vmaxnm_f32(float32x2_t a,float32x2_t b)4000 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
4001   return vmaxnm_f32(a, b);
4002 }
4003 
4004 // CHECK-LABEL: @test_vmaxnmq_f32(
4005 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4006 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4007 // CHECK:   [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
4008 // CHECK:   ret <4 x float> [[VMAXNM2_I]]
test_vmaxnmq_f32(float32x4_t a,float32x4_t b)4009 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
4010   return vmaxnmq_f32(a, b);
4011 }
4012 
4013 // CHECK-LABEL: @test_vmaxnmq_f64(
4014 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4015 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4016 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
4017 // CHECK:   ret <2 x double> [[VMAXNM2_I]]
test_vmaxnmq_f64(float64x2_t a,float64x2_t b)4018 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
4019   return vmaxnmq_f64(a, b);
4020 }
4021 
4022 // CHECK-LABEL: @test_vminnm_f32(
4023 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4024 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4025 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
4026 // CHECK:   ret <2 x float> [[VMINNM2_I]]
test_vminnm_f32(float32x2_t a,float32x2_t b)4027 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
4028   return vminnm_f32(a, b);
4029 }
4030 
4031 // CHECK-LABEL: @test_vminnmq_f32(
4032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4033 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4034 // CHECK:   [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
4035 // CHECK:   ret <4 x float> [[VMINNM2_I]]
test_vminnmq_f32(float32x4_t a,float32x4_t b)4036 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
4037   return vminnmq_f32(a, b);
4038 }
4039 
4040 // CHECK-LABEL: @test_vminnmq_f64(
4041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4042 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4043 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
4044 // CHECK:   ret <2 x double> [[VMINNM2_I]]
test_vminnmq_f64(float64x2_t a,float64x2_t b)4045 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
4046   return vminnmq_f64(a, b);
4047 }
4048 
4049 // CHECK-LABEL: @test_vpmax_s8(
4050 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4051 // CHECK:   ret <8 x i8> [[VPMAX_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)4052 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
4053   return vpmax_s8(a, b);
4054 }
4055 
4056 // CHECK-LABEL: @test_vpmax_s16(
4057 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4058 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4059 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4060 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)4061 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
4062   return vpmax_s16(a, b);
4063 }
4064 
4065 // CHECK-LABEL: @test_vpmax_s32(
4066 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4067 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4068 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4069 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)4070 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
4071   return vpmax_s32(a, b);
4072 }
4073 
4074 // CHECK-LABEL: @test_vpmax_u8(
4075 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4076 // CHECK:   ret <8 x i8> [[VPMAX_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)4077 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
4078   return vpmax_u8(a, b);
4079 }
4080 
4081 // CHECK-LABEL: @test_vpmax_u16(
4082 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4083 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4084 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4085 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)4086 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
4087   return vpmax_u16(a, b);
4088 }
4089 
4090 // CHECK-LABEL: @test_vpmax_u32(
4091 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4092 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4093 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4094 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)4095 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
4096   return vpmax_u32(a, b);
4097 }
4098 
4099 // CHECK-LABEL: @test_vpmax_f32(
4100 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4101 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4102 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
4103 // CHECK:   ret <2 x float> [[VPMAX2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)4104 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
4105   return vpmax_f32(a, b);
4106 }
4107 
4108 // CHECK-LABEL: @test_vpmaxq_s8(
4109 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4110 // CHECK:   ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_s8(int8x16_t a,int8x16_t b)4111 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
4112   return vpmaxq_s8(a, b);
4113 }
4114 
4115 // CHECK-LABEL: @test_vpmaxq_s16(
4116 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4117 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4118 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4119 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_s16(int16x8_t a,int16x8_t b)4120 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
4121   return vpmaxq_s16(a, b);
4122 }
4123 
4124 // CHECK-LABEL: @test_vpmaxq_s32(
4125 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4126 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4127 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4128 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_s32(int32x4_t a,int32x4_t b)4129 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
4130   return vpmaxq_s32(a, b);
4131 }
4132 
4133 // CHECK-LABEL: @test_vpmaxq_u8(
4134 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4135 // CHECK:   ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_u8(uint8x16_t a,uint8x16_t b)4136 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
4137   return vpmaxq_u8(a, b);
4138 }
4139 
4140 // CHECK-LABEL: @test_vpmaxq_u16(
4141 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4142 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4143 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4144 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_u16(uint16x8_t a,uint16x8_t b)4145 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
4146   return vpmaxq_u16(a, b);
4147 }
4148 
4149 // CHECK-LABEL: @test_vpmaxq_u32(
4150 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4151 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4152 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4153 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_u32(uint32x4_t a,uint32x4_t b)4154 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
4155   return vpmaxq_u32(a, b);
4156 }
4157 
4158 // CHECK-LABEL: @test_vpmaxq_f32(
4159 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4160 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4161 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
4162 // CHECK:   ret <4 x float> [[VPMAX2_I]]
test_vpmaxq_f32(float32x4_t a,float32x4_t b)4163 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
4164   return vpmaxq_f32(a, b);
4165 }
4166 
4167 // CHECK-LABEL: @test_vpmaxq_f64(
4168 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4169 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4170 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
4171 // CHECK:   ret <2 x double> [[VPMAX2_I]]
test_vpmaxq_f64(float64x2_t a,float64x2_t b)4172 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
4173   return vpmaxq_f64(a, b);
4174 }
4175 
4176 // CHECK-LABEL: @test_vpmin_s8(
4177 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4178 // CHECK:   ret <8 x i8> [[VPMIN_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)4179 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
4180   return vpmin_s8(a, b);
4181 }
4182 
4183 // CHECK-LABEL: @test_vpmin_s16(
4184 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4185 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4186 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4187 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)4188 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
4189   return vpmin_s16(a, b);
4190 }
4191 
4192 // CHECK-LABEL: @test_vpmin_s32(
4193 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4194 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4195 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4196 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)4197 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
4198   return vpmin_s32(a, b);
4199 }
4200 
4201 // CHECK-LABEL: @test_vpmin_u8(
4202 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4203 // CHECK:   ret <8 x i8> [[VPMIN_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)4204 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
4205   return vpmin_u8(a, b);
4206 }
4207 
4208 // CHECK-LABEL: @test_vpmin_u16(
4209 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4210 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4211 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4212 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)4213 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
4214   return vpmin_u16(a, b);
4215 }
4216 
4217 // CHECK-LABEL: @test_vpmin_u32(
4218 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4219 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4220 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4221 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)4222 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
4223   return vpmin_u32(a, b);
4224 }
4225 
4226 // CHECK-LABEL: @test_vpmin_f32(
4227 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4228 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4229 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
4230 // CHECK:   ret <2 x float> [[VPMIN2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)4231 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
4232   return vpmin_f32(a, b);
4233 }
4234 
4235 // CHECK-LABEL: @test_vpminq_s8(
4236 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4237 // CHECK:   ret <16 x i8> [[VPMIN_I]]
test_vpminq_s8(int8x16_t a,int8x16_t b)4238 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
4239   return vpminq_s8(a, b);
4240 }
4241 
4242 // CHECK-LABEL: @test_vpminq_s16(
4243 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4244 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4245 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4246 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
test_vpminq_s16(int16x8_t a,int16x8_t b)4247 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
4248   return vpminq_s16(a, b);
4249 }
4250 
4251 // CHECK-LABEL: @test_vpminq_s32(
4252 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4253 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4254 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4255 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
test_vpminq_s32(int32x4_t a,int32x4_t b)4256 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
4257   return vpminq_s32(a, b);
4258 }
4259 
4260 // CHECK-LABEL: @test_vpminq_u8(
4261 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4262 // CHECK:   ret <16 x i8> [[VPMIN_I]]
test_vpminq_u8(uint8x16_t a,uint8x16_t b)4263 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
4264   return vpminq_u8(a, b);
4265 }
4266 
4267 // CHECK-LABEL: @test_vpminq_u16(
4268 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4269 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4270 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4271 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
test_vpminq_u16(uint16x8_t a,uint16x8_t b)4272 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
4273   return vpminq_u16(a, b);
4274 }
4275 
4276 // CHECK-LABEL: @test_vpminq_u32(
4277 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4278 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4279 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4280 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
test_vpminq_u32(uint32x4_t a,uint32x4_t b)4281 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
4282   return vpminq_u32(a, b);
4283 }
4284 
4285 // CHECK-LABEL: @test_vpminq_f32(
4286 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4287 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4288 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
4289 // CHECK:   ret <4 x float> [[VPMIN2_I]]
test_vpminq_f32(float32x4_t a,float32x4_t b)4290 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
4291   return vpminq_f32(a, b);
4292 }
4293 
4294 // CHECK-LABEL: @test_vpminq_f64(
4295 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4296 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4297 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
4298 // CHECK:   ret <2 x double> [[VPMIN2_I]]
test_vpminq_f64(float64x2_t a,float64x2_t b)4299 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
4300   return vpminq_f64(a, b);
4301 }
4302 
4303 // CHECK-LABEL: @test_vpmaxnm_f32(
4304 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4305 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4306 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
4307 // CHECK:   ret <2 x float> [[VPMAXNM2_I]]
test_vpmaxnm_f32(float32x2_t a,float32x2_t b)4308 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
4309   return vpmaxnm_f32(a, b);
4310 }
4311 
4312 // CHECK-LABEL: @test_vpmaxnmq_f32(
4313 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4314 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4315 // CHECK:   [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
4316 // CHECK:   ret <4 x float> [[VPMAXNM2_I]]
test_vpmaxnmq_f32(float32x4_t a,float32x4_t b)4317 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
4318   return vpmaxnmq_f32(a, b);
4319 }
4320 
4321 // CHECK-LABEL: @test_vpmaxnmq_f64(
4322 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4323 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4324 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
4325 // CHECK:   ret <2 x double> [[VPMAXNM2_I]]
test_vpmaxnmq_f64(float64x2_t a,float64x2_t b)4326 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
4327   return vpmaxnmq_f64(a, b);
4328 }
4329 
4330 // CHECK-LABEL: @test_vpminnm_f32(
4331 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4332 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4333 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
4334 // CHECK:   ret <2 x float> [[VPMINNM2_I]]
test_vpminnm_f32(float32x2_t a,float32x2_t b)4335 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
4336   return vpminnm_f32(a, b);
4337 }
4338 
4339 // CHECK-LABEL: @test_vpminnmq_f32(
4340 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4341 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4342 // CHECK:   [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
4343 // CHECK:   ret <4 x float> [[VPMINNM2_I]]
test_vpminnmq_f32(float32x4_t a,float32x4_t b)4344 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
4345   return vpminnmq_f32(a, b);
4346 }
4347 
4348 // CHECK-LABEL: @test_vpminnmq_f64(
4349 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4350 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4351 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
4352 // CHECK:   ret <2 x double> [[VPMINNM2_I]]
test_vpminnmq_f64(float64x2_t a,float64x2_t b)4353 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
4354   return vpminnmq_f64(a, b);
4355 }
4356 
4357 // CHECK-LABEL: @test_vpadd_s8(
4358 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4359 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)4360 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
4361   return vpadd_s8(a, b);
4362 }
4363 
4364 // CHECK-LABEL: @test_vpadd_s16(
4365 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4366 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4367 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4368 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4369 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_s16(int16x4_t a,int16x4_t b)4370 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
4371   return vpadd_s16(a, b);
4372 }
4373 
4374 // CHECK-LABEL: @test_vpadd_s32(
4375 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4376 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4377 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4378 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4379 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_s32(int32x2_t a,int32x2_t b)4380 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
4381   return vpadd_s32(a, b);
4382 }
4383 
4384 // CHECK-LABEL: @test_vpadd_u8(
4385 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4386 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)4387 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
4388   return vpadd_u8(a, b);
4389 }
4390 
4391 // CHECK-LABEL: @test_vpadd_u16(
4392 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4393 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4394 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4395 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4396 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)4397 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
4398   return vpadd_u16(a, b);
4399 }
4400 
4401 // CHECK-LABEL: @test_vpadd_u32(
4402 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4403 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4404 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4405 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4406 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)4407 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
4408   return vpadd_u32(a, b);
4409 }
4410 
4411 // CHECK-LABEL: @test_vpadd_f32(
4412 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4413 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4414 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
4415 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
4416 // CHECK:   ret <2 x float> [[VPADD_V2_I]]
test_vpadd_f32(float32x2_t a,float32x2_t b)4417 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
4418   return vpadd_f32(a, b);
4419 }
4420 
4421 // CHECK-LABEL: @test_vpaddq_s8(
4422 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4423 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_s8(int8x16_t a,int8x16_t b)4424 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
4425   return vpaddq_s8(a, b);
4426 }
4427 
4428 // CHECK-LABEL: @test_vpaddq_s16(
4429 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4430 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4431 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4432 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4433 // CHECK:   ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_s16(int16x8_t a,int16x8_t b)4434 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
4435   return vpaddq_s16(a, b);
4436 }
4437 
4438 // CHECK-LABEL: @test_vpaddq_s32(
4439 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4440 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4441 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4442 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4443 // CHECK:   ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_s32(int32x4_t a,int32x4_t b)4444 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
4445   return vpaddq_s32(a, b);
4446 }
4447 
4448 // CHECK-LABEL: @test_vpaddq_u8(
4449 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4450 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_u8(uint8x16_t a,uint8x16_t b)4451 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
4452   return vpaddq_u8(a, b);
4453 }
4454 
4455 // CHECK-LABEL: @test_vpaddq_u16(
4456 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4457 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4458 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4459 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4460 // CHECK:   ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_u16(uint16x8_t a,uint16x8_t b)4461 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
4462   return vpaddq_u16(a, b);
4463 }
4464 
4465 // CHECK-LABEL: @test_vpaddq_u32(
4466 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4467 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4468 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4469 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4470 // CHECK:   ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_u32(uint32x4_t a,uint32x4_t b)4471 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
4472   return vpaddq_u32(a, b);
4473 }
4474 
4475 // CHECK-LABEL: @test_vpaddq_f32(
4476 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4477 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4478 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
4479 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
4480 // CHECK:   ret <4 x float> [[VPADDQ_V2_I]]
test_vpaddq_f32(float32x4_t a,float32x4_t b)4481 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
4482   return vpaddq_f32(a, b);
4483 }
4484 
4485 // CHECK-LABEL: @test_vpaddq_f64(
4486 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4487 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4488 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
4489 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
4490 // CHECK:   ret <2 x double> [[VPADDQ_V2_I]]
test_vpaddq_f64(float64x2_t a,float64x2_t b)4491 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
4492   return vpaddq_f64(a, b);
4493 }
4494 
4495 // CHECK-LABEL: @test_vqdmulh_s16(
4496 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4497 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4498 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4499 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
4500 // CHECK:   ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)4501 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
4502   return vqdmulh_s16(a, b);
4503 }
4504 
4505 // CHECK-LABEL: @test_vqdmulh_s32(
4506 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4507 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4508 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4509 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
4510 // CHECK:   ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)4511 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
4512   return vqdmulh_s32(a, b);
4513 }
4514 
4515 // CHECK-LABEL: @test_vqdmulhq_s16(
4516 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4517 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4518 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4519 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
4520 // CHECK:   ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)4521 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
4522   return vqdmulhq_s16(a, b);
4523 }
4524 
4525 // CHECK-LABEL: @test_vqdmulhq_s32(
4526 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4527 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4528 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4529 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
4530 // CHECK:   ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)4531 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
4532   return vqdmulhq_s32(a, b);
4533 }
4534 
4535 // CHECK-LABEL: @test_vqrdmulh_s16(
4536 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4537 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4538 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4539 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
4540 // CHECK:   ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)4541 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
4542   return vqrdmulh_s16(a, b);
4543 }
4544 
4545 // CHECK-LABEL: @test_vqrdmulh_s32(
4546 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4547 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4548 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4549 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
4550 // CHECK:   ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)4551 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
4552   return vqrdmulh_s32(a, b);
4553 }
4554 
4555 // CHECK-LABEL: @test_vqrdmulhq_s16(
4556 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4557 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4558 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4559 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
4560 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)4561 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
4562   return vqrdmulhq_s16(a, b);
4563 }
4564 
4565 // CHECK-LABEL: @test_vqrdmulhq_s32(
4566 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4567 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4568 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4569 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
4570 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)4571 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
4572   return vqrdmulhq_s32(a, b);
4573 }
4574 
4575 // CHECK-LABEL: @test_vmulx_f32(
4576 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4577 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4578 // CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
4579 // CHECK:   ret <2 x float> [[VMULX2_I]]
test_vmulx_f32(float32x2_t a,float32x2_t b)4580 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
4581   return vmulx_f32(a, b);
4582 }
4583 
4584 // CHECK-LABEL: @test_vmulxq_f32(
4585 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4586 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4587 // CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
4588 // CHECK:   ret <4 x float> [[VMULX2_I]]
test_vmulxq_f32(float32x4_t a,float32x4_t b)4589 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
4590   return vmulxq_f32(a, b);
4591 }
4592 
4593 // CHECK-LABEL: @test_vmulxq_f64(
4594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4595 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4596 // CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
4597 // CHECK:   ret <2 x double> [[VMULX2_I]]
test_vmulxq_f64(float64x2_t a,float64x2_t b)4598 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
4599   return vmulxq_f64(a, b);
4600 }
4601 
4602 // CHECK-LABEL: @test_vshl_n_s8(
4603 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4604 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)4605 int8x8_t test_vshl_n_s8(int8x8_t a) {
4606   return vshl_n_s8(a, 3);
4607 }
4608 
4609 // CHECK-LABEL: @test_vshl_n_s16(
4610 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4611 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4612 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4613 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)4614 int16x4_t test_vshl_n_s16(int16x4_t a) {
4615   return vshl_n_s16(a, 3);
4616 }
4617 
4618 // CHECK-LABEL: @test_vshl_n_s32(
4619 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4620 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4621 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4622 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)4623 int32x2_t test_vshl_n_s32(int32x2_t a) {
4624   return vshl_n_s32(a, 3);
4625 }
4626 
4627 // CHECK-LABEL: @test_vshlq_n_s8(
4628 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4629 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)4630 int8x16_t test_vshlq_n_s8(int8x16_t a) {
4631   return vshlq_n_s8(a, 3);
4632 }
4633 
4634 // CHECK-LABEL: @test_vshlq_n_s16(
4635 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4636 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4637 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4638 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)4639 int16x8_t test_vshlq_n_s16(int16x8_t a) {
4640   return vshlq_n_s16(a, 3);
4641 }
4642 
4643 // CHECK-LABEL: @test_vshlq_n_s32(
4644 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4645 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4646 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4647 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)4648 int32x4_t test_vshlq_n_s32(int32x4_t a) {
4649   return vshlq_n_s32(a, 3);
4650 }
4651 
4652 // CHECK-LABEL: @test_vshlq_n_s64(
4653 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4654 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4655 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4656 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)4657 int64x2_t test_vshlq_n_s64(int64x2_t a) {
4658   return vshlq_n_s64(a, 3);
4659 }
4660 
4661 // CHECK-LABEL: @test_vshl_n_u8(
4662 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4663 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(uint8x8_t a)4664 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
4665   return vshl_n_u8(a, 3);
4666 }
4667 
4668 // CHECK-LABEL: @test_vshl_n_u16(
4669 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4670 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4671 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4672 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(uint16x4_t a)4673 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
4674   return vshl_n_u16(a, 3);
4675 }
4676 
4677 // CHECK-LABEL: @test_vshl_n_u32(
4678 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4679 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4680 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4681 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(uint32x2_t a)4682 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
4683   return vshl_n_u32(a, 3);
4684 }
4685 
4686 // CHECK-LABEL: @test_vshlq_n_u8(
4687 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4688 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(uint8x16_t a)4689 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
4690   return vshlq_n_u8(a, 3);
4691 }
4692 
4693 // CHECK-LABEL: @test_vshlq_n_u16(
4694 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4695 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4696 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4697 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(uint16x8_t a)4698 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
4699   return vshlq_n_u16(a, 3);
4700 }
4701 
4702 // CHECK-LABEL: @test_vshlq_n_u32(
4703 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4704 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4706 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(uint32x4_t a)4707 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
4708   return vshlq_n_u32(a, 3);
4709 }
4710 
4711 // CHECK-LABEL: @test_vshlq_n_u64(
4712 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4713 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4714 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4715 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(uint64x2_t a)4716 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
4717   return vshlq_n_u64(a, 3);
4718 }
4719 
4720 // CHECK-LABEL: @test_vshr_n_s8(
4721 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4722 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)4723 int8x8_t test_vshr_n_s8(int8x8_t a) {
4724   return vshr_n_s8(a, 3);
4725 }
4726 
4727 // CHECK-LABEL: @test_vshr_n_s16(
4728 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4729 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4730 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4731 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)4732 int16x4_t test_vshr_n_s16(int16x4_t a) {
4733   return vshr_n_s16(a, 3);
4734 }
4735 
4736 // CHECK-LABEL: @test_vshr_n_s32(
4737 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4738 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4739 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
4740 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)4741 int32x2_t test_vshr_n_s32(int32x2_t a) {
4742   return vshr_n_s32(a, 3);
4743 }
4744 
4745 // CHECK-LABEL: @test_vshrq_n_s8(
4746 // CHECK:   [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4747 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)4748 int8x16_t test_vshrq_n_s8(int8x16_t a) {
4749   return vshrq_n_s8(a, 3);
4750 }
4751 
4752 // CHECK-LABEL: @test_vshrq_n_s16(
4753 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4754 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4755 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4756 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)4757 int16x8_t test_vshrq_n_s16(int16x8_t a) {
4758   return vshrq_n_s16(a, 3);
4759 }
4760 
4761 // CHECK-LABEL: @test_vshrq_n_s32(
4762 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4763 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4764 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4765 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)4766 int32x4_t test_vshrq_n_s32(int32x4_t a) {
4767   return vshrq_n_s32(a, 3);
4768 }
4769 
4770 // CHECK-LABEL: @test_vshrq_n_s64(
4771 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4772 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4773 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
4774 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)4775 int64x2_t test_vshrq_n_s64(int64x2_t a) {
4776   return vshrq_n_s64(a, 3);
4777 }
4778 
4779 // CHECK-LABEL: @test_vshr_n_u8(
4780 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4781 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(uint8x8_t a)4782 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
4783   return vshr_n_u8(a, 3);
4784 }
4785 
4786 // CHECK-LABEL: @test_vshr_n_u16(
4787 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4788 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4789 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4790 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(uint16x4_t a)4791 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
4792   return vshr_n_u16(a, 3);
4793 }
4794 
4795 // CHECK-LABEL: @test_vshr_n_u32(
4796 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4797 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4798 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
4799 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(uint32x2_t a)4800 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
4801   return vshr_n_u32(a, 3);
4802 }
4803 
4804 // CHECK-LABEL: @test_vshrq_n_u8(
4805 // CHECK:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4806 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(uint8x16_t a)4807 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
4808   return vshrq_n_u8(a, 3);
4809 }
4810 
4811 // CHECK-LABEL: @test_vshrq_n_u16(
4812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4813 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4815 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(uint16x8_t a)4816 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
4817   return vshrq_n_u16(a, 3);
4818 }
4819 
4820 // CHECK-LABEL: @test_vshrq_n_u32(
4821 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4822 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4823 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4824 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(uint32x4_t a)4825 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
4826   return vshrq_n_u32(a, 3);
4827 }
4828 
4829 // CHECK-LABEL: @test_vshrq_n_u64(
4830 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4831 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4832 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
4833 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(uint64x2_t a)4834 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
4835   return vshrq_n_u64(a, 3);
4836 }
4837 
4838 // CHECK-LABEL: @test_vsra_n_s8(
4839 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4840 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4841 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)4842 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
4843   return vsra_n_s8(a, b, 3);
4844 }
4845 
4846 // CHECK-LABEL: @test_vsra_n_s16(
4847 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4848 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4849 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4850 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4851 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4852 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4853 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)4854 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
4855   return vsra_n_s16(a, b, 3);
4856 }
4857 
4858 // CHECK-LABEL: @test_vsra_n_s32(
4859 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4860 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4861 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4862 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4863 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
4864 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4865 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)4866 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
4867   return vsra_n_s32(a, b, 3);
4868 }
4869 
4870 // CHECK-LABEL: @test_vsraq_n_s8(
4871 // CHECK:   [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4872 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4873 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)4874 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
4875   return vsraq_n_s8(a, b, 3);
4876 }
4877 
4878 // CHECK-LABEL: @test_vsraq_n_s16(
4879 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4880 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4881 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4882 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4883 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4884 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4885 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)4886 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
4887   return vsraq_n_s16(a, b, 3);
4888 }
4889 
4890 // CHECK-LABEL: @test_vsraq_n_s32(
4891 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4892 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4893 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4894 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4895 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4896 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4897 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)4898 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
4899   return vsraq_n_s32(a, b, 3);
4900 }
4901 
4902 // CHECK-LABEL: @test_vsraq_n_s64(
4903 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4904 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4905 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4906 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4907 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
4908 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4909 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)4910 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
4911   return vsraq_n_s64(a, b, 3);
4912 }
4913 
4914 // CHECK-LABEL: @test_vsra_n_u8(
4915 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4916 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4917 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_u8(uint8x8_t a,uint8x8_t b)4918 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
4919   return vsra_n_u8(a, b, 3);
4920 }
4921 
4922 // CHECK-LABEL: @test_vsra_n_u16(
4923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4925 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4926 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4927 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4928 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4929 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_u16(uint16x4_t a,uint16x4_t b)4930 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
4931   return vsra_n_u16(a, b, 3);
4932 }
4933 
4934 // CHECK-LABEL: @test_vsra_n_u32(
4935 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4936 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4937 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4938 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4939 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
4940 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4941 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_u32(uint32x2_t a,uint32x2_t b)4942 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
4943   return vsra_n_u32(a, b, 3);
4944 }
4945 
4946 // CHECK-LABEL: @test_vsraq_n_u8(
4947 // CHECK:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4948 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4949 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(uint8x16_t a,uint8x16_t b)4950 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
4951   return vsraq_n_u8(a, b, 3);
4952 }
4953 
4954 // CHECK-LABEL: @test_vsraq_n_u16(
4955 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4956 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4957 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4958 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4959 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4960 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4961 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(uint16x8_t a,uint16x8_t b)4962 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
4963   return vsraq_n_u16(a, b, 3);
4964 }
4965 
4966 // CHECK-LABEL: @test_vsraq_n_u32(
4967 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4968 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4969 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4970 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4971 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4972 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4973 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(uint32x4_t a,uint32x4_t b)4974 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
4975   return vsraq_n_u32(a, b, 3);
4976 }
4977 
4978 // CHECK-LABEL: @test_vsraq_n_u64(
4979 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4980 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4981 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4982 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4983 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
4984 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4985 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(uint64x2_t a,uint64x2_t b)4986 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
4987   return vsraq_n_u64(a, b, 3);
4988 }
4989 
4990 // CHECK-LABEL: @test_vrshr_n_s8(
4991 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
4992 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)4993 int8x8_t test_vrshr_n_s8(int8x8_t a) {
4994   return vrshr_n_s8(a, 3);
4995 }
4996 
4997 // CHECK-LABEL: @test_vrshr_n_s16(
4998 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4999 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5000 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5001 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)5002 int16x4_t test_vrshr_n_s16(int16x4_t a) {
5003   return vrshr_n_s16(a, 3);
5004 }
5005 
5006 // CHECK-LABEL: @test_vrshr_n_s32(
5007 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5008 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5009 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5010 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)5011 int32x2_t test_vrshr_n_s32(int32x2_t a) {
5012   return vrshr_n_s32(a, 3);
5013 }
5014 
5015 // CHECK-LABEL: @test_vrshrq_n_s8(
5016 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5017 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)5018 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
5019   return vrshrq_n_s8(a, 3);
5020 }
5021 
5022 // CHECK-LABEL: @test_vrshrq_n_s16(
5023 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5024 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5025 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5026 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)5027 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
5028   return vrshrq_n_s16(a, 3);
5029 }
5030 
5031 // CHECK-LABEL: @test_vrshrq_n_s32(
5032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5033 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5034 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5035 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)5036 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
5037   return vrshrq_n_s32(a, 3);
5038 }
5039 
5040 // CHECK-LABEL: @test_vrshrq_n_s64(
5041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5042 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5043 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5044 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)5045 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
5046   return vrshrq_n_s64(a, 3);
5047 }
5048 
5049 // CHECK-LABEL: @test_vrshr_n_u8(
5050 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5051 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(uint8x8_t a)5052 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
5053   return vrshr_n_u8(a, 3);
5054 }
5055 
5056 // CHECK-LABEL: @test_vrshr_n_u16(
5057 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5058 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5059 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5060 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(uint16x4_t a)5061 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
5062   return vrshr_n_u16(a, 3);
5063 }
5064 
5065 // CHECK-LABEL: @test_vrshr_n_u32(
5066 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5067 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5068 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5069 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(uint32x2_t a)5070 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
5071   return vrshr_n_u32(a, 3);
5072 }
5073 
5074 // CHECK-LABEL: @test_vrshrq_n_u8(
5075 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5076 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(uint8x16_t a)5077 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
5078   return vrshrq_n_u8(a, 3);
5079 }
5080 
5081 // CHECK-LABEL: @test_vrshrq_n_u16(
5082 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5083 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5084 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5085 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(uint16x8_t a)5086 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
5087   return vrshrq_n_u16(a, 3);
5088 }
5089 
5090 // CHECK-LABEL: @test_vrshrq_n_u32(
5091 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5092 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5093 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5094 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(uint32x4_t a)5095 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
5096   return vrshrq_n_u32(a, 3);
5097 }
5098 
5099 // CHECK-LABEL: @test_vrshrq_n_u64(
5100 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5101 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5102 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5103 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(uint64x2_t a)5104 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
5105   return vrshrq_n_u64(a, 3);
5106 }
5107 
5108 // CHECK-LABEL: @test_vrsra_n_s8(
5109 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5110 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5111 // CHECK:   ret <8 x i8> [[TMP0]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)5112 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
5113   return vrsra_n_s8(a, b, 3);
5114 }
5115 
5116 // CHECK-LABEL: @test_vrsra_n_s16(
5117 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5118 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5119 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5120 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5121 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5122 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5123 // CHECK:   ret <4 x i16> [[TMP3]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)5124 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
5125   return vrsra_n_s16(a, b, 3);
5126 }
5127 
5128 // CHECK-LABEL: @test_vrsra_n_s32(
5129 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5130 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5131 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5132 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5133 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5134 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5135 // CHECK:   ret <2 x i32> [[TMP3]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)5136 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
5137   return vrsra_n_s32(a, b, 3);
5138 }
5139 
5140 // CHECK-LABEL: @test_vrsraq_n_s8(
5141 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5142 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5143 // CHECK:   ret <16 x i8> [[TMP0]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)5144 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
5145   return vrsraq_n_s8(a, b, 3);
5146 }
5147 
5148 // CHECK-LABEL: @test_vrsraq_n_s16(
5149 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5150 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5151 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5152 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5153 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5154 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5155 // CHECK:   ret <8 x i16> [[TMP3]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)5156 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
5157   return vrsraq_n_s16(a, b, 3);
5158 }
5159 
5160 // CHECK-LABEL: @test_vrsraq_n_s32(
5161 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5162 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5163 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5165 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5166 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5167 // CHECK:   ret <4 x i32> [[TMP3]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)5168 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
5169   return vrsraq_n_s32(a, b, 3);
5170 }
5171 
5172 // CHECK-LABEL: @test_vrsraq_n_s64(
5173 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5174 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5175 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5176 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5177 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5178 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5179 // CHECK:   ret <2 x i64> [[TMP3]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)5180 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
5181   return vrsraq_n_s64(a, b, 3);
5182 }
5183 
5184 // CHECK-LABEL: @test_vrsra_n_u8(
5185 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5186 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5187 // CHECK:   ret <8 x i8> [[TMP0]]
test_vrsra_n_u8(uint8x8_t a,uint8x8_t b)5188 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
5189   return vrsra_n_u8(a, b, 3);
5190 }
5191 
5192 // CHECK-LABEL: @test_vrsra_n_u16(
5193 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5194 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5195 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5196 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5197 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5198 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5199 // CHECK:   ret <4 x i16> [[TMP3]]
test_vrsra_n_u16(uint16x4_t a,uint16x4_t b)5200 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
5201   return vrsra_n_u16(a, b, 3);
5202 }
5203 
5204 // CHECK-LABEL: @test_vrsra_n_u32(
5205 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5206 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5207 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5208 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5209 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5210 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5211 // CHECK:   ret <2 x i32> [[TMP3]]
test_vrsra_n_u32(uint32x2_t a,uint32x2_t b)5212 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
5213   return vrsra_n_u32(a, b, 3);
5214 }
5215 
5216 // CHECK-LABEL: @test_vrsraq_n_u8(
5217 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5218 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5219 // CHECK:   ret <16 x i8> [[TMP0]]
test_vrsraq_n_u8(uint8x16_t a,uint8x16_t b)5220 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
5221   return vrsraq_n_u8(a, b, 3);
5222 }
5223 
5224 // CHECK-LABEL: @test_vrsraq_n_u16(
5225 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5226 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5227 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5228 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5229 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5230 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5231 // CHECK:   ret <8 x i16> [[TMP3]]
test_vrsraq_n_u16(uint16x8_t a,uint16x8_t b)5232 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
5233   return vrsraq_n_u16(a, b, 3);
5234 }
5235 
5236 // CHECK-LABEL: @test_vrsraq_n_u32(
5237 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5238 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5239 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5240 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5241 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5242 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5243 // CHECK:   ret <4 x i32> [[TMP3]]
test_vrsraq_n_u32(uint32x4_t a,uint32x4_t b)5244 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
5245   return vrsraq_n_u32(a, b, 3);
5246 }
5247 
5248 // CHECK-LABEL: @test_vrsraq_n_u64(
5249 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5250 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5251 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5252 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5253 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5254 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5255 // CHECK:   ret <2 x i64> [[TMP3]]
test_vrsraq_n_u64(uint64x2_t a,uint64x2_t b)5256 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
5257   return vrsraq_n_u64(a, b, 3);
5258 }
5259 
5260 // CHECK-LABEL: @test_vsri_n_s8(
5261 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5262 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)5263 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
5264   return vsri_n_s8(a, b, 3);
5265 }
5266 
5267 // CHECK-LABEL: @test_vsri_n_s16(
5268 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5269 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5270 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5271 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5272 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5273 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)5274 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
5275   return vsri_n_s16(a, b, 3);
5276 }
5277 
5278 // CHECK-LABEL: @test_vsri_n_s32(
5279 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5280 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5281 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5282 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5283 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5284 // CHECK:   ret <2 x i32> [[VSRI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)5285 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
5286   return vsri_n_s32(a, b, 3);
5287 }
5288 
5289 // CHECK-LABEL: @test_vsriq_n_s8(
5290 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5291 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)5292 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
5293   return vsriq_n_s8(a, b, 3);
5294 }
5295 
5296 // CHECK-LABEL: @test_vsriq_n_s16(
5297 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5298 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5299 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5300 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5301 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5302 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)5303 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
5304   return vsriq_n_s16(a, b, 3);
5305 }
5306 
5307 // CHECK-LABEL: @test_vsriq_n_s32(
5308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5309 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5310 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5311 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5312 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5313 // CHECK:   ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)5314 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
5315   return vsriq_n_s32(a, b, 3);
5316 }
5317 
5318 // CHECK-LABEL: @test_vsriq_n_s64(
5319 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5320 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5321 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5322 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5323 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5324 // CHECK:   ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)5325 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
5326   return vsriq_n_s64(a, b, 3);
5327 }
5328 
5329 // CHECK-LABEL: @test_vsri_n_u8(
5330 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5331 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_u8(uint8x8_t a,uint8x8_t b)5332 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
5333   return vsri_n_u8(a, b, 3);
5334 }
5335 
5336 // CHECK-LABEL: @test_vsri_n_u16(
5337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5339 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5340 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5341 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5342 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_u16(uint16x4_t a,uint16x4_t b)5343 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
5344   return vsri_n_u16(a, b, 3);
5345 }
5346 
5347 // CHECK-LABEL: @test_vsri_n_u32(
5348 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5349 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5350 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5351 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5352 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5353 // CHECK:   ret <2 x i32> [[VSRI_N2]]
test_vsri_n_u32(uint32x2_t a,uint32x2_t b)5354 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
5355   return vsri_n_u32(a, b, 3);
5356 }
5357 
5358 // CHECK-LABEL: @test_vsriq_n_u8(
5359 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5360 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_u8(uint8x16_t a,uint8x16_t b)5361 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
5362   return vsriq_n_u8(a, b, 3);
5363 }
5364 
5365 // CHECK-LABEL: @test_vsriq_n_u16(
5366 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5367 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5368 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5369 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5370 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5371 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_u16(uint16x8_t a,uint16x8_t b)5372 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
5373   return vsriq_n_u16(a, b, 3);
5374 }
5375 
5376 // CHECK-LABEL: @test_vsriq_n_u32(
5377 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5378 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5379 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5380 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5381 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5382 // CHECK:   ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_u32(uint32x4_t a,uint32x4_t b)5383 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
5384   return vsriq_n_u32(a, b, 3);
5385 }
5386 
5387 // CHECK-LABEL: @test_vsriq_n_u64(
5388 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5389 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5390 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5391 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5392 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5393 // CHECK:   ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_u64(uint64x2_t a,uint64x2_t b)5394 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
5395   return vsriq_n_u64(a, b, 3);
5396 }
5397 
5398 // CHECK-LABEL: @test_vsri_n_p8(
5399 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5400 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)5401 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
5402   return vsri_n_p8(a, b, 3);
5403 }
5404 
5405 // CHECK-LABEL: @test_vsri_n_p16(
5406 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5407 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5408 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5409 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5410 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
5411 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)5412 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
5413   return vsri_n_p16(a, b, 15);
5414 }
5415 
5416 // CHECK-LABEL: @test_vsriq_n_p8(
5417 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5418 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)5419 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
5420   return vsriq_n_p8(a, b, 3);
5421 }
5422 
5423 // CHECK-LABEL: @test_vsriq_n_p16(
5424 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5425 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5426 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5427 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5428 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
5429 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)5430 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
5431   return vsriq_n_p16(a, b, 15);
5432 }
5433 
5434 // CHECK-LABEL: @test_vsli_n_s8(
5435 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5436 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)5437 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
5438   return vsli_n_s8(a, b, 3);
5439 }
5440 
5441 // CHECK-LABEL: @test_vsli_n_s16(
5442 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5444 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5445 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5446 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5447 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)5448 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
5449   return vsli_n_s16(a, b, 3);
5450 }
5451 
5452 // CHECK-LABEL: @test_vsli_n_s32(
5453 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5454 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5455 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5456 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5457 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5458 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)5459 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
5460   return vsli_n_s32(a, b, 3);
5461 }
5462 
5463 // CHECK-LABEL: @test_vsliq_n_s8(
5464 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5465 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)5466 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
5467   return vsliq_n_s8(a, b, 3);
5468 }
5469 
5470 // CHECK-LABEL: @test_vsliq_n_s16(
5471 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5472 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5473 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5474 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5475 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5476 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)5477 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
5478   return vsliq_n_s16(a, b, 3);
5479 }
5480 
5481 // CHECK-LABEL: @test_vsliq_n_s32(
5482 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5483 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5484 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5485 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5486 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5487 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)5488 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
5489   return vsliq_n_s32(a, b, 3);
5490 }
5491 
5492 // CHECK-LABEL: @test_vsliq_n_s64(
5493 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5494 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5495 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5496 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5497 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5498 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)5499 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
5500   return vsliq_n_s64(a, b, 3);
5501 }
5502 
5503 // CHECK-LABEL: @test_vsli_n_u8(
5504 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5505 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)5506 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
5507   return vsli_n_u8(a, b, 3);
5508 }
5509 
5510 // CHECK-LABEL: @test_vsli_n_u16(
5511 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5512 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5513 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5514 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5515 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5516 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)5517 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
5518   return vsli_n_u16(a, b, 3);
5519 }
5520 
5521 // CHECK-LABEL: @test_vsli_n_u32(
5522 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5523 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5524 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5525 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5526 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5527 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)5528 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
5529   return vsli_n_u32(a, b, 3);
5530 }
5531 
5532 // CHECK-LABEL: @test_vsliq_n_u8(
5533 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5534 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)5535 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
5536   return vsliq_n_u8(a, b, 3);
5537 }
5538 
5539 // CHECK-LABEL: @test_vsliq_n_u16(
5540 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5541 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5542 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5543 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5544 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5545 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)5546 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
5547   return vsliq_n_u16(a, b, 3);
5548 }
5549 
5550 // CHECK-LABEL: @test_vsliq_n_u32(
5551 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5552 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5553 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5554 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5555 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5556 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)5557 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
5558   return vsliq_n_u32(a, b, 3);
5559 }
5560 
5561 // CHECK-LABEL: @test_vsliq_n_u64(
5562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5563 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5564 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5565 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5566 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5567 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)5568 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
5569   return vsliq_n_u64(a, b, 3);
5570 }
5571 
5572 // CHECK-LABEL: @test_vsli_n_p8(
5573 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5574 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)5575 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
5576   return vsli_n_p8(a, b, 3);
5577 }
5578 
5579 // CHECK-LABEL: @test_vsli_n_p16(
5580 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5581 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5582 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5583 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5584 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
5585 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)5586 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
5587   return vsli_n_p16(a, b, 15);
5588 }
5589 
5590 // CHECK-LABEL: @test_vsliq_n_p8(
5591 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5592 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)5593 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
5594   return vsliq_n_p8(a, b, 3);
5595 }
5596 
5597 // CHECK-LABEL: @test_vsliq_n_p16(
5598 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5599 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5600 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5601 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5602 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
5603 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)5604 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
5605   return vsliq_n_p16(a, b, 15);
5606 }
5607 
5608 // CHECK-LABEL: @test_vqshlu_n_s8(
5609 // CHECK:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5610 // CHECK:   ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)5611 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
5612   return vqshlu_n_s8(a, 3);
5613 }
5614 
5615 // CHECK-LABEL: @test_vqshlu_n_s16(
5616 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5617 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5618 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
5619 // CHECK:   ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)5620 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
5621   return vqshlu_n_s16(a, 3);
5622 }
5623 
5624 // CHECK-LABEL: @test_vqshlu_n_s32(
5625 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5626 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5627 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
5628 // CHECK:   ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)5629 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
5630   return vqshlu_n_s32(a, 3);
5631 }
5632 
5633 // CHECK-LABEL: @test_vqshluq_n_s8(
5634 // CHECK:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5635 // CHECK:   ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)5636 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
5637   return vqshluq_n_s8(a, 3);
5638 }
5639 
5640 // CHECK-LABEL: @test_vqshluq_n_s16(
5641 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5642 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5643 // CHECK:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
5644 // CHECK:   ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)5645 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
5646   return vqshluq_n_s16(a, 3);
5647 }
5648 
5649 // CHECK-LABEL: @test_vqshluq_n_s32(
5650 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5651 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5652 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
5653 // CHECK:   ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)5654 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
5655   return vqshluq_n_s32(a, 3);
5656 }
5657 
5658 // CHECK-LABEL: @test_vqshluq_n_s64(
5659 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5660 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5661 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
5662 // CHECK:   ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)5663 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
5664   return vqshluq_n_s64(a, 3);
5665 }
5666 
5667 // CHECK-LABEL: @test_vshrn_n_s16(
5668 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5669 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5670 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5671 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5672 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)5673 int8x8_t test_vshrn_n_s16(int16x8_t a) {
5674   return vshrn_n_s16(a, 3);
5675 }
5676 
5677 // CHECK-LABEL: @test_vshrn_n_s32(
5678 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5679 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5680 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5681 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5682 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)5683 int16x4_t test_vshrn_n_s32(int32x4_t a) {
5684   return vshrn_n_s32(a, 9);
5685 }
5686 
5687 // CHECK-LABEL: @test_vshrn_n_s64(
5688 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5689 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5690 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5691 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5692 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)5693 int32x2_t test_vshrn_n_s64(int64x2_t a) {
5694   return vshrn_n_s64(a, 19);
5695 }
5696 
5697 // CHECK-LABEL: @test_vshrn_n_u16(
5698 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5699 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5700 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5701 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5702 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)5703 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
5704   return vshrn_n_u16(a, 3);
5705 }
5706 
5707 // CHECK-LABEL: @test_vshrn_n_u32(
5708 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5709 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5710 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5711 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5712 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)5713 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
5714   return vshrn_n_u32(a, 9);
5715 }
5716 
5717 // CHECK-LABEL: @test_vshrn_n_u64(
5718 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5719 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5720 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5721 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5722 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)5723 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
5724   return vshrn_n_u64(a, 19);
5725 }
5726 
5727 // CHECK-LABEL: @test_vshrn_high_n_s16(
5728 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5729 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5730 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5731 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5732 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5733 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_s16(int8x8_t a,int16x8_t b)5734 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5735   return vshrn_high_n_s16(a, b, 3);
5736 }
5737 
5738 // CHECK-LABEL: @test_vshrn_high_n_s32(
5739 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5740 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5741 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5742 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5743 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5744 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_s32(int16x4_t a,int32x4_t b)5745 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5746   return vshrn_high_n_s32(a, b, 9);
5747 }
5748 
5749 // CHECK-LABEL: @test_vshrn_high_n_s64(
5750 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5751 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5752 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5753 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5754 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5755 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_s64(int32x2_t a,int64x2_t b)5756 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5757   return vshrn_high_n_s64(a, b, 19);
5758 }
5759 
5760 // CHECK-LABEL: @test_vshrn_high_n_u16(
5761 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5762 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5763 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5764 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5765 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5766 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5767 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5768   return vshrn_high_n_u16(a, b, 3);
5769 }
5770 
5771 // CHECK-LABEL: @test_vshrn_high_n_u32(
5772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5773 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5774 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5775 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5776 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5777 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5778 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5779   return vshrn_high_n_u32(a, b, 9);
5780 }
5781 
5782 // CHECK-LABEL: @test_vshrn_high_n_u64(
5783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5784 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5785 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5786 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5787 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5788 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5789 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5790   return vshrn_high_n_u64(a, b, 19);
5791 }
5792 
5793 // CHECK-LABEL: @test_vqshrun_n_s16(
5794 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5795 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5796 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5797 // CHECK:   ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)5798 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
5799   return vqshrun_n_s16(a, 3);
5800 }
5801 
5802 // CHECK-LABEL: @test_vqshrun_n_s32(
5803 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5804 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5805 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5806 // CHECK:   ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)5807 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
5808   return vqshrun_n_s32(a, 9);
5809 }
5810 
5811 // CHECK-LABEL: @test_vqshrun_n_s64(
5812 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5813 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5814 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5815 // CHECK:   ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)5816 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
5817   return vqshrun_n_s64(a, 19);
5818 }
5819 
5820 // CHECK-LABEL: @test_vqshrun_high_n_s16(
5821 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5822 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5823 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5824 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5825 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrun_high_n_s16(int8x8_t a,int16x8_t b)5826 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5827   return vqshrun_high_n_s16(a, b, 3);
5828 }
5829 
5830 // CHECK-LABEL: @test_vqshrun_high_n_s32(
5831 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5832 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5833 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5834 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5835 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrun_high_n_s32(int16x4_t a,int32x4_t b)5836 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
5837   return vqshrun_high_n_s32(a, b, 9);
5838 }
5839 
5840 // CHECK-LABEL: @test_vqshrun_high_n_s64(
5841 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5842 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5843 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5844 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5845 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrun_high_n_s64(int32x2_t a,int64x2_t b)5846 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
5847   return vqshrun_high_n_s64(a, b, 19);
5848 }
5849 
5850 // CHECK-LABEL: @test_vrshrn_n_s16(
5851 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5852 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5853 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5854 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)5855 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
5856   return vrshrn_n_s16(a, 3);
5857 }
5858 
5859 // CHECK-LABEL: @test_vrshrn_n_s32(
5860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5861 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5862 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5863 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)5864 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
5865   return vrshrn_n_s32(a, 9);
5866 }
5867 
5868 // CHECK-LABEL: @test_vrshrn_n_s64(
5869 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5870 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5871 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5872 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)5873 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
5874   return vrshrn_n_s64(a, 19);
5875 }
5876 
5877 // CHECK-LABEL: @test_vrshrn_n_u16(
5878 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5879 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5880 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5881 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)5882 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
5883   return vrshrn_n_u16(a, 3);
5884 }
5885 
5886 // CHECK-LABEL: @test_vrshrn_n_u32(
5887 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5888 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5889 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5890 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)5891 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
5892   return vrshrn_n_u32(a, 9);
5893 }
5894 
5895 // CHECK-LABEL: @test_vrshrn_n_u64(
5896 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5897 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5898 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5899 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)5900 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
5901   return vrshrn_n_u64(a, 19);
5902 }
5903 
5904 // CHECK-LABEL: @test_vrshrn_high_n_s16(
5905 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5906 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5907 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5908 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5909 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_s16(int8x8_t a,int16x8_t b)5910 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5911   return vrshrn_high_n_s16(a, b, 3);
5912 }
5913 
5914 // CHECK-LABEL: @test_vrshrn_high_n_s32(
5915 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5916 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5917 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5918 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5919 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_s32(int16x4_t a,int32x4_t b)5920 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5921   return vrshrn_high_n_s32(a, b, 9);
5922 }
5923 
5924 // CHECK-LABEL: @test_vrshrn_high_n_s64(
5925 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5926 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5927 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5928 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5929 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_s64(int32x2_t a,int64x2_t b)5930 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5931   return vrshrn_high_n_s64(a, b, 19);
5932 }
5933 
5934 // CHECK-LABEL: @test_vrshrn_high_n_u16(
5935 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5936 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5937 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5938 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5939 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5940 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5941   return vrshrn_high_n_u16(a, b, 3);
5942 }
5943 
5944 // CHECK-LABEL: @test_vrshrn_high_n_u32(
5945 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5946 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5947 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5948 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5949 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5950 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5951   return vrshrn_high_n_u32(a, b, 9);
5952 }
5953 
5954 // CHECK-LABEL: @test_vrshrn_high_n_u64(
5955 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5956 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5957 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5958 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5959 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5960 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5961   return vrshrn_high_n_u64(a, b, 19);
5962 }
5963 
5964 // CHECK-LABEL: @test_vqrshrun_n_s16(
5965 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5966 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5967 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5968 // CHECK:   ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)5969 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
5970   return vqrshrun_n_s16(a, 3);
5971 }
5972 
5973 // CHECK-LABEL: @test_vqrshrun_n_s32(
5974 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5975 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5976 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
5977 // CHECK:   ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)5978 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
5979   return vqrshrun_n_s32(a, 9);
5980 }
5981 
5982 // CHECK-LABEL: @test_vqrshrun_n_s64(
5983 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5984 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5985 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
5986 // CHECK:   ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)5987 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
5988   return vqrshrun_n_s64(a, 19);
5989 }
5990 
5991 // CHECK-LABEL: @test_vqrshrun_high_n_s16(
5992 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5993 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5994 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5995 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5996 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrun_high_n_s16(int8x8_t a,int16x8_t b)5997 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5998   return vqrshrun_high_n_s16(a, b, 3);
5999 }
6000 
6001 // CHECK-LABEL: @test_vqrshrun_high_n_s32(
6002 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6003 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6004 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6005 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6006 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrun_high_n_s32(int16x4_t a,int32x4_t b)6007 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6008   return vqrshrun_high_n_s32(a, b, 9);
6009 }
6010 
6011 // CHECK-LABEL: @test_vqrshrun_high_n_s64(
6012 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6013 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6014 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6015 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6016 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrun_high_n_s64(int32x2_t a,int64x2_t b)6017 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6018   return vqrshrun_high_n_s64(a, b, 19);
6019 }
6020 
6021 // CHECK-LABEL: @test_vqshrn_n_s16(
6022 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6023 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6025 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)6026 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
6027   return vqshrn_n_s16(a, 3);
6028 }
6029 
6030 // CHECK-LABEL: @test_vqshrn_n_s32(
6031 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6032 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6033 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6034 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)6035 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
6036   return vqshrn_n_s32(a, 9);
6037 }
6038 
6039 // CHECK-LABEL: @test_vqshrn_n_s64(
6040 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6041 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6042 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6043 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)6044 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
6045   return vqshrn_n_s64(a, 19);
6046 }
6047 
6048 // CHECK-LABEL: @test_vqshrn_n_u16(
6049 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6050 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6051 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6052 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)6053 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
6054   return vqshrn_n_u16(a, 3);
6055 }
6056 
6057 // CHECK-LABEL: @test_vqshrn_n_u32(
6058 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6059 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6060 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6061 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)6062 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
6063   return vqshrn_n_u32(a, 9);
6064 }
6065 
6066 // CHECK-LABEL: @test_vqshrn_n_u64(
6067 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6068 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6069 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6070 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)6071 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
6072   return vqshrn_n_u64(a, 19);
6073 }
6074 
6075 // CHECK-LABEL: @test_vqshrn_high_n_s16(
6076 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6077 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6078 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6079 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6080 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_s16(int8x8_t a,int16x8_t b)6081 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6082   return vqshrn_high_n_s16(a, b, 3);
6083 }
6084 
6085 // CHECK-LABEL: @test_vqshrn_high_n_s32(
6086 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6087 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6088 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6089 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6090 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_s32(int16x4_t a,int32x4_t b)6091 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6092   return vqshrn_high_n_s32(a, b, 9);
6093 }
6094 
6095 // CHECK-LABEL: @test_vqshrn_high_n_s64(
6096 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6097 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6098 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6099 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6100 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_s64(int32x2_t a,int64x2_t b)6101 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6102   return vqshrn_high_n_s64(a, b, 19);
6103 }
6104 
6105 // CHECK-LABEL: @test_vqshrn_high_n_u16(
6106 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6107 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6108 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6109 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6110 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6111 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6112   return vqshrn_high_n_u16(a, b, 3);
6113 }
6114 
6115 // CHECK-LABEL: @test_vqshrn_high_n_u32(
6116 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6117 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6118 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6119 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6120 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6121 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6122   return vqshrn_high_n_u32(a, b, 9);
6123 }
6124 
6125 // CHECK-LABEL: @test_vqshrn_high_n_u64(
6126 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6127 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6128 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6129 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6130 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6131 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6132   return vqshrn_high_n_u64(a, b, 19);
6133 }
6134 
6135 // CHECK-LABEL: @test_vqrshrn_n_s16(
6136 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6137 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6138 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6139 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)6140 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
6141   return vqrshrn_n_s16(a, 3);
6142 }
6143 
6144 // CHECK-LABEL: @test_vqrshrn_n_s32(
6145 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6146 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6147 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6148 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)6149 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
6150   return vqrshrn_n_s32(a, 9);
6151 }
6152 
6153 // CHECK-LABEL: @test_vqrshrn_n_s64(
6154 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6155 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6156 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6157 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)6158 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
6159   return vqrshrn_n_s64(a, 19);
6160 }
6161 
6162 // CHECK-LABEL: @test_vqrshrn_n_u16(
6163 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6164 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6165 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6166 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)6167 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
6168   return vqrshrn_n_u16(a, 3);
6169 }
6170 
6171 // CHECK-LABEL: @test_vqrshrn_n_u32(
6172 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6173 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6174 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6175 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)6176 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
6177   return vqrshrn_n_u32(a, 9);
6178 }
6179 
6180 // CHECK-LABEL: @test_vqrshrn_n_u64(
6181 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6182 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6183 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6184 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)6185 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
6186   return vqrshrn_n_u64(a, 19);
6187 }
6188 
6189 // CHECK-LABEL: @test_vqrshrn_high_n_s16(
6190 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6191 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6192 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6193 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6194 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_s16(int8x8_t a,int16x8_t b)6195 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6196   return vqrshrn_high_n_s16(a, b, 3);
6197 }
6198 
6199 // CHECK-LABEL: @test_vqrshrn_high_n_s32(
6200 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6201 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6202 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6203 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6204 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_s32(int16x4_t a,int32x4_t b)6205 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6206   return vqrshrn_high_n_s32(a, b, 9);
6207 }
6208 
6209 // CHECK-LABEL: @test_vqrshrn_high_n_s64(
6210 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6211 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6212 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6213 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6214 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_s64(int32x2_t a,int64x2_t b)6215 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6216   return vqrshrn_high_n_s64(a, b, 19);
6217 }
6218 
6219 // CHECK-LABEL: @test_vqrshrn_high_n_u16(
6220 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6221 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6222 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6223 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6224 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6225 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6226   return vqrshrn_high_n_u16(a, b, 3);
6227 }
6228 
6229 // CHECK-LABEL: @test_vqrshrn_high_n_u32(
6230 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6231 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6232 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6233 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6234 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6235 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6236   return vqrshrn_high_n_u32(a, b, 9);
6237 }
6238 
6239 // CHECK-LABEL: @test_vqrshrn_high_n_u64(
6240 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6241 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6242 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6243 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6244 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6245 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6246   return vqrshrn_high_n_u64(a, b, 19);
6247 }
6248 
6249 // CHECK-LABEL: @test_vshll_n_s8(
6250 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6251 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6252 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)6253 int16x8_t test_vshll_n_s8(int8x8_t a) {
6254   return vshll_n_s8(a, 3);
6255 }
6256 
6257 // CHECK-LABEL: @test_vshll_n_s16(
6258 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6259 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6260 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6261 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6262 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)6263 int32x4_t test_vshll_n_s16(int16x4_t a) {
6264   return vshll_n_s16(a, 9);
6265 }
6266 
6267 // CHECK-LABEL: @test_vshll_n_s32(
6268 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6269 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6270 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6271 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6272 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)6273 int64x2_t test_vshll_n_s32(int32x2_t a) {
6274   return vshll_n_s32(a, 19);
6275 }
6276 
6277 // CHECK-LABEL: @test_vshll_n_u8(
6278 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6279 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6280 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)6281 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
6282   return vshll_n_u8(a, 3);
6283 }
6284 
6285 // CHECK-LABEL: @test_vshll_n_u16(
6286 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6287 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6288 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6289 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6290 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)6291 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
6292   return vshll_n_u16(a, 9);
6293 }
6294 
6295 // CHECK-LABEL: @test_vshll_n_u32(
6296 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6297 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6298 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6299 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6300 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)6301 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
6302   return vshll_n_u32(a, 19);
6303 }
6304 
6305 // CHECK-LABEL: @test_vshll_high_n_s8(
6306 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6307 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6308 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6309 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_s8(int8x16_t a)6310 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
6311   return vshll_high_n_s8(a, 3);
6312 }
6313 
6314 // CHECK-LABEL: @test_vshll_high_n_s16(
6315 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6316 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6317 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6318 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6319 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6320 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_s16(int16x8_t a)6321 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
6322   return vshll_high_n_s16(a, 9);
6323 }
6324 
6325 // CHECK-LABEL: @test_vshll_high_n_s32(
6326 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6327 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6328 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6329 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6330 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6331 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_s32(int32x4_t a)6332 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
6333   return vshll_high_n_s32(a, 19);
6334 }
6335 
6336 // CHECK-LABEL: @test_vshll_high_n_u8(
6337 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6338 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6339 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6340 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_u8(uint8x16_t a)6341 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
6342   return vshll_high_n_u8(a, 3);
6343 }
6344 
6345 // CHECK-LABEL: @test_vshll_high_n_u16(
6346 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6347 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6348 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6349 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6350 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6351 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_u16(uint16x8_t a)6352 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
6353   return vshll_high_n_u16(a, 9);
6354 }
6355 
6356 // CHECK-LABEL: @test_vshll_high_n_u32(
6357 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6358 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6359 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6360 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6361 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6362 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_u32(uint32x4_t a)6363 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
6364   return vshll_high_n_u32(a, 19);
6365 }
6366 
6367 // CHECK-LABEL: @test_vmovl_s8(
6368 // CHECK:   [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6369 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)6370 int16x8_t test_vmovl_s8(int8x8_t a) {
6371   return vmovl_s8(a);
6372 }
6373 
6374 // CHECK-LABEL: @test_vmovl_s16(
6375 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6376 // CHECK:   [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6377 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)6378 int32x4_t test_vmovl_s16(int16x4_t a) {
6379   return vmovl_s16(a);
6380 }
6381 
6382 // CHECK-LABEL: @test_vmovl_s32(
6383 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6384 // CHECK:   [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6385 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)6386 int64x2_t test_vmovl_s32(int32x2_t a) {
6387   return vmovl_s32(a);
6388 }
6389 
6390 // CHECK-LABEL: @test_vmovl_u8(
6391 // CHECK:   [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6392 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)6393 uint16x8_t test_vmovl_u8(uint8x8_t a) {
6394   return vmovl_u8(a);
6395 }
6396 
6397 // CHECK-LABEL: @test_vmovl_u16(
6398 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6399 // CHECK:   [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6400 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)6401 uint32x4_t test_vmovl_u16(uint16x4_t a) {
6402   return vmovl_u16(a);
6403 }
6404 
6405 // CHECK-LABEL: @test_vmovl_u32(
6406 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6407 // CHECK:   [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6408 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)6409 uint64x2_t test_vmovl_u32(uint32x2_t a) {
6410   return vmovl_u32(a);
6411 }
6412 
6413 // CHECK-LABEL: @test_vmovl_high_s8(
6414 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6415 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6416 // CHECK:   ret <8 x i16> [[TMP0]]
test_vmovl_high_s8(int8x16_t a)6417 int16x8_t test_vmovl_high_s8(int8x16_t a) {
6418   return vmovl_high_s8(a);
6419 }
6420 
6421 // CHECK-LABEL: @test_vmovl_high_s16(
6422 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6423 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6424 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6425 // CHECK:   ret <4 x i32> [[TMP1]]
test_vmovl_high_s16(int16x8_t a)6426 int32x4_t test_vmovl_high_s16(int16x8_t a) {
6427   return vmovl_high_s16(a);
6428 }
6429 
6430 // CHECK-LABEL: @test_vmovl_high_s32(
6431 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6432 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6433 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6434 // CHECK:   ret <2 x i64> [[TMP1]]
test_vmovl_high_s32(int32x4_t a)6435 int64x2_t test_vmovl_high_s32(int32x4_t a) {
6436   return vmovl_high_s32(a);
6437 }
6438 
6439 // CHECK-LABEL: @test_vmovl_high_u8(
6440 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6441 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6442 // CHECK:   ret <8 x i16> [[TMP0]]
test_vmovl_high_u8(uint8x16_t a)6443 uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
6444   return vmovl_high_u8(a);
6445 }
6446 
6447 // CHECK-LABEL: @test_vmovl_high_u16(
6448 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6449 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6450 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6451 // CHECK:   ret <4 x i32> [[TMP1]]
test_vmovl_high_u16(uint16x8_t a)6452 uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
6453   return vmovl_high_u16(a);
6454 }
6455 
6456 // CHECK-LABEL: @test_vmovl_high_u32(
6457 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6458 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6459 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6460 // CHECK:   ret <2 x i64> [[TMP1]]
test_vmovl_high_u32(uint32x4_t a)6461 uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
6462   return vmovl_high_u32(a);
6463 }
6464 
6465 // CHECK-LABEL: @test_vcvt_n_f32_s32(
6466 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6467 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6468 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6469 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)6470 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
6471   return vcvt_n_f32_s32(a, 31);
6472 }
6473 
6474 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
6475 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6476 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6477 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6478 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)6479 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
6480   return vcvtq_n_f32_s32(a, 31);
6481 }
6482 
6483 // CHECK-LABEL: @test_vcvtq_n_f64_s64(
6484 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6485 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6486 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6487 // CHECK:   ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_s64(int64x2_t a)6488 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
6489   return vcvtq_n_f64_s64(a, 50);
6490 }
6491 
6492 // CHECK-LABEL: @test_vcvt_n_f32_u32(
6493 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6494 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6495 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6496 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)6497 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
6498   return vcvt_n_f32_u32(a, 31);
6499 }
6500 
6501 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
6502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6503 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6504 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6505 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)6506 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
6507   return vcvtq_n_f32_u32(a, 31);
6508 }
6509 
6510 // CHECK-LABEL: @test_vcvtq_n_f64_u64(
6511 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6512 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6513 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6514 // CHECK:   ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_u64(uint64x2_t a)6515 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
6516   return vcvtq_n_f64_u64(a, 50);
6517 }
6518 
6519 // CHECK-LABEL: @test_vcvt_n_s32_f32(
6520 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6521 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6522 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6523 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)6524 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
6525   return vcvt_n_s32_f32(a, 31);
6526 }
6527 
6528 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
6529 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6530 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6531 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6532 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)6533 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
6534   return vcvtq_n_s32_f32(a, 31);
6535 }
6536 
6537 // CHECK-LABEL: @test_vcvtq_n_s64_f64(
6538 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6539 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6540 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6541 // CHECK:   ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_s64_f64(float64x2_t a)6542 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
6543   return vcvtq_n_s64_f64(a, 50);
6544 }
6545 
6546 // CHECK-LABEL: @test_vcvt_n_u32_f32(
6547 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6548 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6549 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6550 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)6551 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
6552   return vcvt_n_u32_f32(a, 31);
6553 }
6554 
6555 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
6556 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6557 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6558 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6559 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)6560 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
6561   return vcvtq_n_u32_f32(a, 31);
6562 }
6563 
6564 // CHECK-LABEL: @test_vcvtq_n_u64_f64(
6565 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6566 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6567 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6568 // CHECK:   ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_u64_f64(float64x2_t a)6569 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
6570   return vcvtq_n_u64_f64(a, 50);
6571 }
6572 
6573 // CHECK-LABEL: @test_vaddl_s8(
6574 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6575 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6576 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6577 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)6578 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
6579   return vaddl_s8(a, b);
6580 }
6581 
6582 // CHECK-LABEL: @test_vaddl_s16(
6583 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6584 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6585 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6586 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6587 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6588 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)6589 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
6590   return vaddl_s16(a, b);
6591 }
6592 
6593 // CHECK-LABEL: @test_vaddl_s32(
6594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6595 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6596 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6597 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6598 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6599 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)6600 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
6601   return vaddl_s32(a, b);
6602 }
6603 
6604 // CHECK-LABEL: @test_vaddl_u8(
6605 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6606 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6607 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6608 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)6609 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
6610   return vaddl_u8(a, b);
6611 }
6612 
6613 // CHECK-LABEL: @test_vaddl_u16(
6614 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6615 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6616 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6617 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6618 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6619 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)6620 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
6621   return vaddl_u16(a, b);
6622 }
6623 
6624 // CHECK-LABEL: @test_vaddl_u32(
6625 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6626 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6628 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6629 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6630 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)6631 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
6632   return vaddl_u32(a, b);
6633 }
6634 
6635 // CHECK-LABEL: @test_vaddl_high_s8(
6636 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6637 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6638 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6639 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6640 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6641 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_high_s8(int8x16_t a,int8x16_t b)6642 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
6643   return vaddl_high_s8(a, b);
6644 }
6645 
6646 // CHECK-LABEL: @test_vaddl_high_s16(
6647 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6648 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6649 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6650 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6651 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6652 // CHECK:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6653 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6654 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_high_s16(int16x8_t a,int16x8_t b)6655 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
6656   return vaddl_high_s16(a, b);
6657 }
6658 
6659 // CHECK-LABEL: @test_vaddl_high_s32(
6660 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6661 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6662 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6663 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6664 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6665 // CHECK:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6666 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6667 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_high_s32(int32x4_t a,int32x4_t b)6668 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
6669   return vaddl_high_s32(a, b);
6670 }
6671 
6672 // CHECK-LABEL: @test_vaddl_high_u8(
6673 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6674 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6675 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6676 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6677 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6678 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_high_u8(uint8x16_t a,uint8x16_t b)6679 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
6680   return vaddl_high_u8(a, b);
6681 }
6682 
6683 // CHECK-LABEL: @test_vaddl_high_u16(
6684 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6685 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6686 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6687 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6688 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6689 // CHECK:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6690 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6691 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_high_u16(uint16x8_t a,uint16x8_t b)6692 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
6693   return vaddl_high_u16(a, b);
6694 }
6695 
6696 // CHECK-LABEL: @test_vaddl_high_u32(
6697 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6698 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6699 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6700 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6701 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6702 // CHECK:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6703 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6704 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_high_u32(uint32x4_t a,uint32x4_t b)6705 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
6706   return vaddl_high_u32(a, b);
6707 }
6708 
6709 // CHECK-LABEL: @test_vaddw_s8(
6710 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6711 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6712 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)6713 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
6714   return vaddw_s8(a, b);
6715 }
6716 
6717 // CHECK-LABEL: @test_vaddw_s16(
6718 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6719 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6720 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6721 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)6722 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
6723   return vaddw_s16(a, b);
6724 }
6725 
6726 // CHECK-LABEL: @test_vaddw_s32(
6727 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6728 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6729 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6730 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)6731 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
6732   return vaddw_s32(a, b);
6733 }
6734 
6735 // CHECK-LABEL: @test_vaddw_u8(
6736 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6737 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6738 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)6739 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
6740   return vaddw_u8(a, b);
6741 }
6742 
6743 // CHECK-LABEL: @test_vaddw_u16(
6744 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6745 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6746 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6747 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)6748 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
6749   return vaddw_u16(a, b);
6750 }
6751 
6752 // CHECK-LABEL: @test_vaddw_u32(
6753 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6754 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6755 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6756 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)6757 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
6758   return vaddw_u32(a, b);
6759 }
6760 
6761 // CHECK-LABEL: @test_vaddw_high_s8(
6762 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6763 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6764 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6765 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_high_s8(int16x8_t a,int8x16_t b)6766 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
6767   return vaddw_high_s8(a, b);
6768 }
6769 
6770 // CHECK-LABEL: @test_vaddw_high_s16(
6771 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6773 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6774 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6775 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_high_s16(int32x4_t a,int16x8_t b)6776 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
6777   return vaddw_high_s16(a, b);
6778 }
6779 
6780 // CHECK-LABEL: @test_vaddw_high_s32(
6781 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6782 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6783 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6784 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6785 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_high_s32(int64x2_t a,int32x4_t b)6786 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
6787   return vaddw_high_s32(a, b);
6788 }
6789 
6790 // CHECK-LABEL: @test_vaddw_high_u8(
6791 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6792 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6793 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6794 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_high_u8(uint16x8_t a,uint8x16_t b)6795 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
6796   return vaddw_high_u8(a, b);
6797 }
6798 
6799 // CHECK-LABEL: @test_vaddw_high_u16(
6800 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6801 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6802 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6803 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6804 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_high_u16(uint32x4_t a,uint16x8_t b)6805 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
6806   return vaddw_high_u16(a, b);
6807 }
6808 
6809 // CHECK-LABEL: @test_vaddw_high_u32(
6810 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6811 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6812 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6813 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6814 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_high_u32(uint64x2_t a,uint32x4_t b)6815 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
6816   return vaddw_high_u32(a, b);
6817 }
6818 
6819 // CHECK-LABEL: @test_vsubl_s8(
6820 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6821 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6822 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6823 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)6824 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
6825   return vsubl_s8(a, b);
6826 }
6827 
6828 // CHECK-LABEL: @test_vsubl_s16(
6829 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6830 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6831 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6832 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6833 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6834 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)6835 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
6836   return vsubl_s16(a, b);
6837 }
6838 
6839 // CHECK-LABEL: @test_vsubl_s32(
6840 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6841 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6842 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6843 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6844 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6845 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)6846 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
6847   return vsubl_s32(a, b);
6848 }
6849 
6850 // CHECK-LABEL: @test_vsubl_u8(
6851 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6852 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6853 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6854 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)6855 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
6856   return vsubl_u8(a, b);
6857 }
6858 
6859 // CHECK-LABEL: @test_vsubl_u16(
6860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6861 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6862 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6863 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6864 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6865 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)6866 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
6867   return vsubl_u16(a, b);
6868 }
6869 
6870 // CHECK-LABEL: @test_vsubl_u32(
6871 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6872 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6873 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6874 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6875 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6876 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)6877 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
6878   return vsubl_u32(a, b);
6879 }
6880 
6881 // CHECK-LABEL: @test_vsubl_high_s8(
6882 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6883 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6884 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6885 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6886 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6887 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_high_s8(int8x16_t a,int8x16_t b)6888 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
6889   return vsubl_high_s8(a, b);
6890 }
6891 
6892 // CHECK-LABEL: @test_vsubl_high_s16(
6893 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6895 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6896 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6897 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6898 // CHECK:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6899 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6900 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_high_s16(int16x8_t a,int16x8_t b)6901 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
6902   return vsubl_high_s16(a, b);
6903 }
6904 
6905 // CHECK-LABEL: @test_vsubl_high_s32(
6906 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6907 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6908 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6909 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6910 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6911 // CHECK:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6912 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6913 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_high_s32(int32x4_t a,int32x4_t b)6914 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
6915   return vsubl_high_s32(a, b);
6916 }
6917 
6918 // CHECK-LABEL: @test_vsubl_high_u8(
6919 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6920 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6921 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6922 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6923 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6924 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_high_u8(uint8x16_t a,uint8x16_t b)6925 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
6926   return vsubl_high_u8(a, b);
6927 }
6928 
6929 // CHECK-LABEL: @test_vsubl_high_u16(
6930 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6931 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6932 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6933 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6934 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6935 // CHECK:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6936 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6937 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_high_u16(uint16x8_t a,uint16x8_t b)6938 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
6939   return vsubl_high_u16(a, b);
6940 }
6941 
6942 // CHECK-LABEL: @test_vsubl_high_u32(
6943 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6944 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6945 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6946 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6947 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6948 // CHECK:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6949 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6950 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_high_u32(uint32x4_t a,uint32x4_t b)6951 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
6952   return vsubl_high_u32(a, b);
6953 }
6954 
6955 // CHECK-LABEL: @test_vsubw_s8(
6956 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6957 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6958 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)6959 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
6960   return vsubw_s8(a, b);
6961 }
6962 
6963 // CHECK-LABEL: @test_vsubw_s16(
6964 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6965 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6966 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6967 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)6968 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
6969   return vsubw_s16(a, b);
6970 }
6971 
6972 // CHECK-LABEL: @test_vsubw_s32(
6973 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6974 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6975 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
6976 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)6977 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
6978   return vsubw_s32(a, b);
6979 }
6980 
6981 // CHECK-LABEL: @test_vsubw_u8(
6982 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6983 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6984 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)6985 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
6986   return vsubw_u8(a, b);
6987 }
6988 
6989 // CHECK-LABEL: @test_vsubw_u16(
6990 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6991 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6992 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6993 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)6994 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
6995   return vsubw_u16(a, b);
6996 }
6997 
6998 // CHECK-LABEL: @test_vsubw_u32(
6999 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7000 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
7001 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7002 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)7003 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
7004   return vsubw_u32(a, b);
7005 }
7006 
7007 // CHECK-LABEL: @test_vsubw_high_s8(
7008 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7009 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7010 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7011 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_high_s8(int16x8_t a,int8x16_t b)7012 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
7013   return vsubw_high_s8(a, b);
7014 }
7015 
7016 // CHECK-LABEL: @test_vsubw_high_s16(
7017 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7018 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7019 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7020 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7021 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_high_s16(int32x4_t a,int16x8_t b)7022 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
7023   return vsubw_high_s16(a, b);
7024 }
7025 
7026 // CHECK-LABEL: @test_vsubw_high_s32(
7027 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7028 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7029 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7030 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7031 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_high_s32(int64x2_t a,int32x4_t b)7032 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
7033   return vsubw_high_s32(a, b);
7034 }
7035 
7036 // CHECK-LABEL: @test_vsubw_high_u8(
7037 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7038 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7039 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7040 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_high_u8(uint16x8_t a,uint8x16_t b)7041 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
7042   return vsubw_high_u8(a, b);
7043 }
7044 
7045 // CHECK-LABEL: @test_vsubw_high_u16(
7046 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7047 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7048 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7049 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7050 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_high_u16(uint32x4_t a,uint16x8_t b)7051 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
7052   return vsubw_high_u16(a, b);
7053 }
7054 
7055 // CHECK-LABEL: @test_vsubw_high_u32(
7056 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7057 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7058 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7059 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7060 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_high_u32(uint64x2_t a,uint32x4_t b)7061 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
7062   return vsubw_high_u32(a, b);
7063 }
7064 
7065 // CHECK-LABEL: @test_vaddhn_s16(
7066 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7067 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7068 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7069 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7070 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7071 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)7072 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
7073   return vaddhn_s16(a, b);
7074 }
7075 
7076 // CHECK-LABEL: @test_vaddhn_s32(
7077 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7078 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7079 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7080 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7081 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7082 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)7083 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
7084   return vaddhn_s32(a, b);
7085 }
7086 
7087 // CHECK-LABEL: @test_vaddhn_s64(
7088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7089 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7090 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7091 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7092 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7093 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)7094 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
7095   return vaddhn_s64(a, b);
7096 }
7097 
7098 // CHECK-LABEL: @test_vaddhn_u16(
7099 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7100 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7101 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7102 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7103 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7104 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)7105 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
7106   return vaddhn_u16(a, b);
7107 }
7108 
7109 // CHECK-LABEL: @test_vaddhn_u32(
7110 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7111 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7112 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7113 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7114 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7115 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)7116 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
7117   return vaddhn_u32(a, b);
7118 }
7119 
7120 // CHECK-LABEL: @test_vaddhn_u64(
7121 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7122 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7123 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7124 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7125 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7126 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)7127 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
7128   return vaddhn_u64(a, b);
7129 }
7130 
7131 // CHECK-LABEL: @test_vaddhn_high_s16(
7132 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7133 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7134 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7135 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7136 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7137 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7138 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7139 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7140   return vaddhn_high_s16(r, a, b);
7141 }
7142 
7143 // CHECK-LABEL: @test_vaddhn_high_s32(
7144 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7145 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7146 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7147 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7148 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7149 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7150 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7151 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7152   return vaddhn_high_s32(r, a, b);
7153 }
7154 
7155 // CHECK-LABEL: @test_vaddhn_high_s64(
7156 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7157 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7158 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7159 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7160 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7161 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7162 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7163 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7164   return vaddhn_high_s64(r, a, b);
7165 }
7166 
7167 // CHECK-LABEL: @test_vaddhn_high_u16(
7168 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7169 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7170 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7171 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7172 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7173 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7174 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7175 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7176   return vaddhn_high_u16(r, a, b);
7177 }
7178 
7179 // CHECK-LABEL: @test_vaddhn_high_u32(
7180 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7181 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7182 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7183 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7184 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7185 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7186 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7187 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7188   return vaddhn_high_u32(r, a, b);
7189 }
7190 
7191 // CHECK-LABEL: @test_vaddhn_high_u64(
7192 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7193 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7194 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7195 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7196 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7197 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7198 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7199 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7200   return vaddhn_high_u64(r, a, b);
7201 }
7202 
7203 // CHECK-LABEL: @test_vraddhn_s16(
7204 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7205 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7206 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7207 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)7208 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
7209   return vraddhn_s16(a, b);
7210 }
7211 
7212 // CHECK-LABEL: @test_vraddhn_s32(
7213 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7214 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7215 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7216 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7217 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)7218 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
7219   return vraddhn_s32(a, b);
7220 }
7221 
7222 // CHECK-LABEL: @test_vraddhn_s64(
7223 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7224 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7225 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7226 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7227 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)7228 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
7229   return vraddhn_s64(a, b);
7230 }
7231 
7232 // CHECK-LABEL: @test_vraddhn_u16(
7233 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7234 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7235 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7236 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)7237 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
7238   return vraddhn_u16(a, b);
7239 }
7240 
7241 // CHECK-LABEL: @test_vraddhn_u32(
7242 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7243 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7244 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7245 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7246 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)7247 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
7248   return vraddhn_u32(a, b);
7249 }
7250 
7251 // CHECK-LABEL: @test_vraddhn_u64(
7252 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7253 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7254 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7255 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7256 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)7257 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
7258   return vraddhn_u64(a, b);
7259 }
7260 
7261 // CHECK-LABEL: @test_vraddhn_high_s16(
7262 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7263 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7264 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7265 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7266 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7267 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7268   return vraddhn_high_s16(r, a, b);
7269 }
7270 
7271 // CHECK-LABEL: @test_vraddhn_high_s32(
7272 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7273 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7274 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7275 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7276 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7277 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7278 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7279   return vraddhn_high_s32(r, a, b);
7280 }
7281 
7282 // CHECK-LABEL: @test_vraddhn_high_s64(
7283 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7284 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7285 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7286 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7287 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7288 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7289 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7290   return vraddhn_high_s64(r, a, b);
7291 }
7292 
7293 // CHECK-LABEL: @test_vraddhn_high_u16(
7294 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7295 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7296 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7297 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7298 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7299 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7300   return vraddhn_high_u16(r, a, b);
7301 }
7302 
7303 // CHECK-LABEL: @test_vraddhn_high_u32(
7304 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7305 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7306 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7307 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7308 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7309 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7310 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7311   return vraddhn_high_u32(r, a, b);
7312 }
7313 
7314 // CHECK-LABEL: @test_vraddhn_high_u64(
7315 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7316 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7317 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7318 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7319 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7320 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7321 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7322   return vraddhn_high_u64(r, a, b);
7323 }
7324 
7325 // CHECK-LABEL: @test_vsubhn_s16(
7326 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7327 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7328 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7329 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7330 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7331 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)7332 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
7333   return vsubhn_s16(a, b);
7334 }
7335 
7336 // CHECK-LABEL: @test_vsubhn_s32(
7337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7339 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7340 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7341 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7342 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)7343 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
7344   return vsubhn_s32(a, b);
7345 }
7346 
7347 // CHECK-LABEL: @test_vsubhn_s64(
7348 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7349 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7350 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7351 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7352 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7353 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)7354 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
7355   return vsubhn_s64(a, b);
7356 }
7357 
7358 // CHECK-LABEL: @test_vsubhn_u16(
7359 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7360 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7361 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7362 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7363 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7364 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)7365 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
7366   return vsubhn_u16(a, b);
7367 }
7368 
7369 // CHECK-LABEL: @test_vsubhn_u32(
7370 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7371 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7372 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7373 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7374 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7375 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)7376 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
7377   return vsubhn_u32(a, b);
7378 }
7379 
7380 // CHECK-LABEL: @test_vsubhn_u64(
7381 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7382 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7383 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7384 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7385 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7386 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)7387 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
7388   return vsubhn_u64(a, b);
7389 }
7390 
7391 // CHECK-LABEL: @test_vsubhn_high_s16(
7392 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7393 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7394 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7395 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7396 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7397 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7398 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7399 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7400   return vsubhn_high_s16(r, a, b);
7401 }
7402 
7403 // CHECK-LABEL: @test_vsubhn_high_s32(
7404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7405 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7406 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7407 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7408 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7409 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7410 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7411 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7412   return vsubhn_high_s32(r, a, b);
7413 }
7414 
7415 // CHECK-LABEL: @test_vsubhn_high_s64(
7416 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7417 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7418 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7419 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7420 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7421 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7422 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7423 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7424   return vsubhn_high_s64(r, a, b);
7425 }
7426 
7427 // CHECK-LABEL: @test_vsubhn_high_u16(
7428 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7429 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7430 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7431 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7432 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7433 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7434 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7435 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7436   return vsubhn_high_u16(r, a, b);
7437 }
7438 
7439 // CHECK-LABEL: @test_vsubhn_high_u32(
7440 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7441 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7442 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7443 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7444 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7445 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7446 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7447 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7448   return vsubhn_high_u32(r, a, b);
7449 }
7450 
7451 // CHECK-LABEL: @test_vsubhn_high_u64(
7452 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7453 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7454 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7455 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7456 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7457 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7458 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7459 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7460   return vsubhn_high_u64(r, a, b);
7461 }
7462 
7463 // CHECK-LABEL: @test_vrsubhn_s16(
7464 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7465 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7466 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7467 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)7468 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
7469   return vrsubhn_s16(a, b);
7470 }
7471 
7472 // CHECK-LABEL: @test_vrsubhn_s32(
7473 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7474 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7475 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7476 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7477 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)7478 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
7479   return vrsubhn_s32(a, b);
7480 }
7481 
7482 // CHECK-LABEL: @test_vrsubhn_s64(
7483 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7484 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7485 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7486 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7487 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)7488 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
7489   return vrsubhn_s64(a, b);
7490 }
7491 
7492 // CHECK-LABEL: @test_vrsubhn_u16(
7493 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7494 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7495 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7496 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)7497 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
7498   return vrsubhn_u16(a, b);
7499 }
7500 
7501 // CHECK-LABEL: @test_vrsubhn_u32(
7502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7503 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7504 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7505 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7506 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)7507 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
7508   return vrsubhn_u32(a, b);
7509 }
7510 
7511 // CHECK-LABEL: @test_vrsubhn_u64(
7512 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7513 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7514 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7515 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7516 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)7517 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
7518   return vrsubhn_u64(a, b);
7519 }
7520 
7521 // CHECK-LABEL: @test_vrsubhn_high_s16(
7522 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7523 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7524 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7525 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7526 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7527 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7528   return vrsubhn_high_s16(r, a, b);
7529 }
7530 
7531 // CHECK-LABEL: @test_vrsubhn_high_s32(
7532 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7533 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7534 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7535 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7536 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7537 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7538 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7539   return vrsubhn_high_s32(r, a, b);
7540 }
7541 
7542 // CHECK-LABEL: @test_vrsubhn_high_s64(
7543 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7544 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7545 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7546 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7547 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7548 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7549 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7550   return vrsubhn_high_s64(r, a, b);
7551 }
7552 
7553 // CHECK-LABEL: @test_vrsubhn_high_u16(
7554 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7555 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7556 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7557 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7558 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7559 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7560   return vrsubhn_high_u16(r, a, b);
7561 }
7562 
7563 // CHECK-LABEL: @test_vrsubhn_high_u32(
7564 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7565 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7566 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7567 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7568 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7569 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7570 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7571   return vrsubhn_high_u32(r, a, b);
7572 }
7573 
7574 // CHECK-LABEL: @test_vrsubhn_high_u64(
7575 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7576 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7577 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7578 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7579 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7580 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7581 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7582   return vrsubhn_high_u64(r, a, b);
7583 }
7584 
7585 // CHECK-LABEL: @test_vabdl_s8(
7586 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7587 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7588 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)7589 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
7590   return vabdl_s8(a, b);
7591 }
7592 
7593 // CHECK-LABEL: @test_vabdl_s16(
7594 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7595 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7596 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7597 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7598 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7599 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)7600 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
7601   return vabdl_s16(a, b);
7602 }
7603 
7604 // CHECK-LABEL: @test_vabdl_s32(
7605 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7606 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7607 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7608 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7609 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7610 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)7611 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
7612   return vabdl_s32(a, b);
7613 }
7614 
7615 // CHECK-LABEL: @test_vabdl_u8(
7616 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7617 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7618 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)7619 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
7620   return vabdl_u8(a, b);
7621 }
7622 
7623 // CHECK-LABEL: @test_vabdl_u16(
7624 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7625 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7626 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7627 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7628 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7629 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)7630 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
7631   return vabdl_u16(a, b);
7632 }
7633 
7634 // CHECK-LABEL: @test_vabdl_u32(
7635 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7636 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7637 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7638 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7639 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7640 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)7641 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
7642   return vabdl_u32(a, b);
7643 }
7644 
7645 // CHECK-LABEL: @test_vabal_s8(
7646 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7647 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7648 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7649 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7650 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7651   return vabal_s8(a, b, c);
7652 }
7653 
7654 // CHECK-LABEL: @test_vabal_s16(
7655 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7656 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7657 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7658 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7659 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7660 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7661 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7662 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7663   return vabal_s16(a, b, c);
7664 }
7665 
7666 // CHECK-LABEL: @test_vabal_s32(
7667 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7668 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7669 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7670 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7671 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7672 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7673 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7674 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7675   return vabal_s32(a, b, c);
7676 }
7677 
7678 // CHECK-LABEL: @test_vabal_u8(
7679 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7680 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7681 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7682 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7683 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7684   return vabal_u8(a, b, c);
7685 }
7686 
7687 // CHECK-LABEL: @test_vabal_u16(
7688 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7689 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7690 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7691 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7692 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7693 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7694 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7695 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7696   return vabal_u16(a, b, c);
7697 }
7698 
7699 // CHECK-LABEL: @test_vabal_u32(
7700 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7701 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7702 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7703 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7704 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7705 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7706 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7707 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7708   return vabal_u32(a, b, c);
7709 }
7710 
7711 // CHECK-LABEL: @test_vabdl_high_s8(
7712 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7713 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7714 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7715 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7716 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_s8(int8x16_t a,int8x16_t b)7717 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
7718   return vabdl_high_s8(a, b);
7719 }
7720 
7721 // CHECK-LABEL: @test_vabdl_high_s16(
7722 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7723 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7724 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7725 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7726 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7727 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7728 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7729 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_s16(int16x8_t a,int16x8_t b)7730 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
7731   return vabdl_high_s16(a, b);
7732 }
7733 
7734 // CHECK-LABEL: @test_vabdl_high_s32(
7735 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7736 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7737 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7738 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7739 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7740 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7741 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7742 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_s32(int32x4_t a,int32x4_t b)7743 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
7744   return vabdl_high_s32(a, b);
7745 }
7746 
7747 // CHECK-LABEL: @test_vabdl_high_u8(
7748 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7749 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7750 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7751 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7752 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_u8(uint8x16_t a,uint8x16_t b)7753 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
7754   return vabdl_high_u8(a, b);
7755 }
7756 
7757 // CHECK-LABEL: @test_vabdl_high_u16(
7758 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7759 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7760 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7761 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7762 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7763 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7764 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7765 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_u16(uint16x8_t a,uint16x8_t b)7766 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
7767   return vabdl_high_u16(a, b);
7768 }
7769 
7770 // CHECK-LABEL: @test_vabdl_high_u32(
7771 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7772 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7773 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7774 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7775 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7776 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7777 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7778 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_u32(uint32x4_t a,uint32x4_t b)7779 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
7780   return vabdl_high_u32(a, b);
7781 }
7782 
7783 // CHECK-LABEL: @test_vabal_high_s8(
7784 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7785 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7786 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7787 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7788 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7789 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vabal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)7790 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
7791   return vabal_high_s8(a, b, c);
7792 }
7793 
7794 // CHECK-LABEL: @test_vabal_high_s16(
7795 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7796 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7798 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7799 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7800 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7801 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7802 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7803 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vabal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)7804 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
7805   return vabal_high_s16(a, b, c);
7806 }
7807 
7808 // CHECK-LABEL: @test_vabal_high_s32(
7809 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7810 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7811 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7812 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7813 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7814 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7815 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7816 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7817 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vabal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)7818 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
7819   return vabal_high_s32(a, b, c);
7820 }
7821 
7822 // CHECK-LABEL: @test_vabal_high_u8(
7823 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7824 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7825 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7826 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7827 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7828 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vabal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)7829 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
7830   return vabal_high_u8(a, b, c);
7831 }
7832 
7833 // CHECK-LABEL: @test_vabal_high_u16(
7834 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7835 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7836 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7837 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7838 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7839 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7840 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7841 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7842 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vabal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)7843 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
7844   return vabal_high_u16(a, b, c);
7845 }
7846 
7847 // CHECK-LABEL: @test_vabal_high_u32(
7848 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7849 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7850 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7851 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7852 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7853 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7854 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7855 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7856 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vabal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)7857 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
7858   return vabal_high_u32(a, b, c);
7859 }
7860 
7861 // CHECK-LABEL: @test_vmull_s8(
7862 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
7863 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)7864 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
7865   return vmull_s8(a, b);
7866 }
7867 
7868 // CHECK-LABEL: @test_vmull_s16(
7869 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7870 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7871 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
7872 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)7873 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
7874   return vmull_s16(a, b);
7875 }
7876 
7877 // CHECK-LABEL: @test_vmull_s32(
7878 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7879 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7880 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
7881 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)7882 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
7883   return vmull_s32(a, b);
7884 }
7885 
7886 // CHECK-LABEL: @test_vmull_u8(
7887 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
7888 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)7889 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
7890   return vmull_u8(a, b);
7891 }
7892 
7893 // CHECK-LABEL: @test_vmull_u16(
7894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7895 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7896 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
7897 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)7898 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
7899   return vmull_u16(a, b);
7900 }
7901 
7902 // CHECK-LABEL: @test_vmull_u32(
7903 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7904 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7905 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
7906 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)7907 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
7908   return vmull_u32(a, b);
7909 }
7910 
7911 // CHECK-LABEL: @test_vmull_high_s8(
7912 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7913 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7914 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7915 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_s8(int8x16_t a,int8x16_t b)7916 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
7917   return vmull_high_s8(a, b);
7918 }
7919 
7920 // CHECK-LABEL: @test_vmull_high_s16(
7921 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7922 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7925 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7926 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_s16(int16x8_t a,int16x8_t b)7927 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
7928   return vmull_high_s16(a, b);
7929 }
7930 
7931 // CHECK-LABEL: @test_vmull_high_s32(
7932 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7933 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7934 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7935 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7936 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7937 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_s32(int32x4_t a,int32x4_t b)7938 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
7939   return vmull_high_s32(a, b);
7940 }
7941 
7942 // CHECK-LABEL: @test_vmull_high_u8(
7943 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7944 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7945 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7946 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_u8(uint8x16_t a,uint8x16_t b)7947 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
7948   return vmull_high_u8(a, b);
7949 }
7950 
7951 // CHECK-LABEL: @test_vmull_high_u16(
7952 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7953 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7954 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7955 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7956 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7957 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_u16(uint16x8_t a,uint16x8_t b)7958 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
7959   return vmull_high_u16(a, b);
7960 }
7961 
7962 // CHECK-LABEL: @test_vmull_high_u32(
7963 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7964 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7965 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7966 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7967 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7968 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_u32(uint32x4_t a,uint32x4_t b)7969 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
7970   return vmull_high_u32(a, b);
7971 }
7972 
7973 // CHECK-LABEL: @test_vmlal_s8(
7974 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
7975 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7976 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7977 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7978   return vmlal_s8(a, b, c);
7979 }
7980 
7981 // CHECK-LABEL: @test_vmlal_s16(
7982 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7983 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7984 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
7985 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7986 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7987 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7988   return vmlal_s16(a, b, c);
7989 }
7990 
7991 // CHECK-LABEL: @test_vmlal_s32(
7992 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7993 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7994 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
7995 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7996 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7997 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7998   return vmlal_s32(a, b, c);
7999 }
8000 
8001 // CHECK-LABEL: @test_vmlal_u8(
8002 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8003 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8004 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8005 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8006   return vmlal_u8(a, b, c);
8007 }
8008 
8009 // CHECK-LABEL: @test_vmlal_u16(
8010 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8011 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8012 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8013 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8014 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8015 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8016   return vmlal_u16(a, b, c);
8017 }
8018 
8019 // CHECK-LABEL: @test_vmlal_u32(
8020 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8021 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8022 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8023 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8024 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8025 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8026   return vmlal_u32(a, b, c);
8027 }
8028 
8029 // CHECK-LABEL: @test_vmlal_high_s8(
8030 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8031 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8032 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8033 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8034 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8035 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8036   return vmlal_high_s8(a, b, c);
8037 }
8038 
8039 // CHECK-LABEL: @test_vmlal_high_s16(
8040 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8041 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8042 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8043 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8044 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8045 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8046 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8047 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8048   return vmlal_high_s16(a, b, c);
8049 }
8050 
8051 // CHECK-LABEL: @test_vmlal_high_s32(
8052 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8053 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8054 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8055 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8056 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8057 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8058 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8059 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8060   return vmlal_high_s32(a, b, c);
8061 }
8062 
8063 // CHECK-LABEL: @test_vmlal_high_u8(
8064 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8065 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8066 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8067 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8068 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8069 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8070   return vmlal_high_u8(a, b, c);
8071 }
8072 
8073 // CHECK-LABEL: @test_vmlal_high_u16(
8074 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8075 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8076 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8077 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8078 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8079 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8080 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8081 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8082   return vmlal_high_u16(a, b, c);
8083 }
8084 
8085 // CHECK-LABEL: @test_vmlal_high_u32(
8086 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8087 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8089 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8090 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8091 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8092 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8093 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8094   return vmlal_high_u32(a, b, c);
8095 }
8096 
8097 // CHECK-LABEL: @test_vmlsl_s8(
8098 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
8099 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8100 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)8101 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8102   return vmlsl_s8(a, b, c);
8103 }
8104 
8105 // CHECK-LABEL: @test_vmlsl_s16(
8106 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8107 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8108 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
8109 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8110 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8111 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8112   return vmlsl_s16(a, b, c);
8113 }
8114 
8115 // CHECK-LABEL: @test_vmlsl_s32(
8116 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8117 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8118 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
8119 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8120 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8121 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8122   return vmlsl_s32(a, b, c);
8123 }
8124 
8125 // CHECK-LABEL: @test_vmlsl_u8(
8126 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8127 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8128 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8129 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8130   return vmlsl_u8(a, b, c);
8131 }
8132 
8133 // CHECK-LABEL: @test_vmlsl_u16(
8134 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8135 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8136 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8137 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8138 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8139 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8140   return vmlsl_u16(a, b, c);
8141 }
8142 
8143 // CHECK-LABEL: @test_vmlsl_u32(
8144 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8145 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8146 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8147 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8148 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8149 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8150   return vmlsl_u32(a, b, c);
8151 }
8152 
8153 // CHECK-LABEL: @test_vmlsl_high_s8(
8154 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8155 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8156 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8157 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8158 // CHECK:   ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8159 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8160   return vmlsl_high_s8(a, b, c);
8161 }
8162 
8163 // CHECK-LABEL: @test_vmlsl_high_s16(
8164 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8165 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8166 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8167 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8168 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8169 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8170 // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8171 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8172   return vmlsl_high_s16(a, b, c);
8173 }
8174 
8175 // CHECK-LABEL: @test_vmlsl_high_s32(
8176 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8177 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8178 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8179 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8180 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8181 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8182 // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8183 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8184   return vmlsl_high_s32(a, b, c);
8185 }
8186 
8187 // CHECK-LABEL: @test_vmlsl_high_u8(
8188 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8189 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8190 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8191 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8192 // CHECK:   ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8193 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8194   return vmlsl_high_u8(a, b, c);
8195 }
8196 
8197 // CHECK-LABEL: @test_vmlsl_high_u16(
8198 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8199 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8200 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8201 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8202 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8203 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8204 // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8205 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8206   return vmlsl_high_u16(a, b, c);
8207 }
8208 
8209 // CHECK-LABEL: @test_vmlsl_high_u32(
8210 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8211 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8212 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8213 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8214 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8215 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8216 // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8217 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8218   return vmlsl_high_u32(a, b, c);
8219 }
8220 
8221 // CHECK-LABEL: @test_vqdmull_s16(
8222 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8223 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8224 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
8225 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
8226 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)8227 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
8228   return vqdmull_s16(a, b);
8229 }
8230 
8231 // CHECK-LABEL: @test_vqdmull_s32(
8232 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8233 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8234 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
8235 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
8236 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)8237 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
8238   return vqdmull_s32(a, b);
8239 }
8240 
8241 // CHECK-LABEL: @test_vqdmlal_s16(
8242 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8243 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8244 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8245 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8246 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8247 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8248 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8249   return vqdmlal_s16(a, b, c);
8250 }
8251 
8252 // CHECK-LABEL: @test_vqdmlal_s32(
8253 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8254 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8255 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8256 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8257 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8258 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8259 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8260   return vqdmlal_s32(a, b, c);
8261 }
8262 
8263 // CHECK-LABEL: @test_vqdmlsl_s16(
8264 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8265 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8266 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8267 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8268 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8269 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8270 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8271   return vqdmlsl_s16(a, b, c);
8272 }
8273 
8274 // CHECK-LABEL: @test_vqdmlsl_s32(
8275 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8276 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8277 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8278 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8279 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8280 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8281 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8282   return vqdmlsl_s32(a, b, c);
8283 }
8284 
8285 // CHECK-LABEL: @test_vqdmull_high_s16(
8286 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8287 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8288 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8289 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8290 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8291 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
8292 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s16(int16x8_t a,int16x8_t b)8293 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
8294   return vqdmull_high_s16(a, b);
8295 }
8296 
8297 // CHECK-LABEL: @test_vqdmull_high_s32(
8298 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8299 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8300 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8301 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8302 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8303 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
8304 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s32(int32x4_t a,int32x4_t b)8305 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
8306   return vqdmull_high_s32(a, b);
8307 }
8308 
8309 // CHECK-LABEL: @test_vqdmlal_high_s16(
8310 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8311 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8312 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8313 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8314 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8315 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8316 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8317 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8318 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8319   return vqdmlal_high_s16(a, b, c);
8320 }
8321 
8322 // CHECK-LABEL: @test_vqdmlal_high_s32(
8323 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8324 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8325 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8326 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8327 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8328 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8329 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8330 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8331 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8332   return vqdmlal_high_s32(a, b, c);
8333 }
8334 
8335 // CHECK-LABEL: @test_vqdmlsl_high_s16(
8336 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8337 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8338 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8339 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8340 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8341 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8342 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8343 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8344 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8345   return vqdmlsl_high_s16(a, b, c);
8346 }
8347 
8348 // CHECK-LABEL: @test_vqdmlsl_high_s32(
8349 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8350 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8351 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8352 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8353 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8354 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8355 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8356 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8357 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8358   return vqdmlsl_high_s32(a, b, c);
8359 }
8360 
8361 // CHECK-LABEL: @test_vmull_p8(
8362 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
8363 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)8364 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8365   return vmull_p8(a, b);
8366 }
8367 
8368 // CHECK-LABEL: @test_vmull_high_p8(
8369 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8370 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8371 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8372 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_p8(poly8x16_t a,poly8x16_t b)8373 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
8374   return vmull_high_p8(a, b);
8375 }
8376 
8377 // CHECK-LABEL: @test_vaddd_s64(
8378 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
8379 // CHECK:   ret i64 [[VADDD_I]]
test_vaddd_s64(int64_t a,int64_t b)8380 int64_t test_vaddd_s64(int64_t a, int64_t b) {
8381   return vaddd_s64(a, b);
8382 }
8383 
8384 // CHECK-LABEL: @test_vaddd_u64(
8385 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
8386 // CHECK:   ret i64 [[VADDD_I]]
test_vaddd_u64(uint64_t a,uint64_t b)8387 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
8388   return vaddd_u64(a, b);
8389 }
8390 
8391 // CHECK-LABEL: @test_vsubd_s64(
8392 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
8393 // CHECK:   ret i64 [[VSUBD_I]]
test_vsubd_s64(int64_t a,int64_t b)8394 int64_t test_vsubd_s64(int64_t a, int64_t b) {
8395   return vsubd_s64(a, b);
8396 }
8397 
8398 // CHECK-LABEL: @test_vsubd_u64(
8399 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
8400 // CHECK:   ret i64 [[VSUBD_I]]
test_vsubd_u64(uint64_t a,uint64_t b)8401 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
8402   return vsubd_u64(a, b);
8403 }
8404 
8405 // CHECK-LABEL: @test_vqaddb_s8(
8406 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8407 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8408 // CHECK:   [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8409 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
8410 // CHECK:   ret i8 [[TMP2]]
test_vqaddb_s8(int8_t a,int8_t b)8411 int8_t test_vqaddb_s8(int8_t a, int8_t b) {
8412   return vqaddb_s8(a, b);
8413 }
8414 
8415 // CHECK-LABEL: @test_vqaddh_s16(
8416 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8417 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8418 // CHECK:   [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8419 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
8420 // CHECK:   ret i16 [[TMP2]]
test_vqaddh_s16(int16_t a,int16_t b)8421 int16_t test_vqaddh_s16(int16_t a, int16_t b) {
8422   return vqaddh_s16(a, b);
8423 }
8424 
8425 // CHECK-LABEL: @test_vqadds_s32(
8426 // CHECK:   [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b)
8427 // CHECK:   ret i32 [[VQADDS_S32_I]]
test_vqadds_s32(int32_t a,int32_t b)8428 int32_t test_vqadds_s32(int32_t a, int32_t b) {
8429   return vqadds_s32(a, b);
8430 }
8431 
8432 // CHECK-LABEL: @test_vqaddd_s64(
8433 // CHECK:   [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b)
8434 // CHECK:   ret i64 [[VQADDD_S64_I]]
test_vqaddd_s64(int64_t a,int64_t b)8435 int64_t test_vqaddd_s64(int64_t a, int64_t b) {
8436   return vqaddd_s64(a, b);
8437 }
8438 
8439 // CHECK-LABEL: @test_vqaddb_u8(
8440 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8441 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8442 // CHECK:   [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8443 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
8444 // CHECK:   ret i8 [[TMP2]]
test_vqaddb_u8(uint8_t a,uint8_t b)8445 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
8446   return vqaddb_u8(a, b);
8447 }
8448 
8449 // CHECK-LABEL: @test_vqaddh_u16(
8450 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8451 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8452 // CHECK:   [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8453 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
8454 // CHECK:   ret i16 [[TMP2]]
test_vqaddh_u16(uint16_t a,uint16_t b)8455 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
8456   return vqaddh_u16(a, b);
8457 }
8458 
8459 // CHECK-LABEL: @test_vqadds_u32(
8460 // CHECK:   [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b)
8461 // CHECK:   ret i32 [[VQADDS_U32_I]]
test_vqadds_u32(uint32_t a,uint32_t b)8462 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
8463   return vqadds_u32(a, b);
8464 }
8465 
8466 // CHECK-LABEL: @test_vqaddd_u64(
8467 // CHECK:   [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b)
8468 // CHECK:   ret i64 [[VQADDD_U64_I]]
test_vqaddd_u64(uint64_t a,uint64_t b)8469 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
8470   return vqaddd_u64(a, b);
8471 }
8472 
8473 // CHECK-LABEL: @test_vqsubb_s8(
8474 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8475 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8476 // CHECK:   [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8477 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
8478 // CHECK:   ret i8 [[TMP2]]
test_vqsubb_s8(int8_t a,int8_t b)8479 int8_t test_vqsubb_s8(int8_t a, int8_t b) {
8480   return vqsubb_s8(a, b);
8481 }
8482 
8483 // CHECK-LABEL: @test_vqsubh_s16(
8484 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8485 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8486 // CHECK:   [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8487 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
8488 // CHECK:   ret i16 [[TMP2]]
test_vqsubh_s16(int16_t a,int16_t b)8489 int16_t test_vqsubh_s16(int16_t a, int16_t b) {
8490   return vqsubh_s16(a, b);
8491 }
8492 
8493 // CHECK-LABEL: @test_vqsubs_s32(
8494 // CHECK:   [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
8495 // CHECK:   ret i32 [[VQSUBS_S32_I]]
test_vqsubs_s32(int32_t a,int32_t b)8496 int32_t test_vqsubs_s32(int32_t a, int32_t b) {
8497   return vqsubs_s32(a, b);
8498 }
8499 
8500 // CHECK-LABEL: @test_vqsubd_s64(
8501 // CHECK:   [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b)
8502 // CHECK:   ret i64 [[VQSUBD_S64_I]]
test_vqsubd_s64(int64_t a,int64_t b)8503 int64_t test_vqsubd_s64(int64_t a, int64_t b) {
8504   return vqsubd_s64(a, b);
8505 }
8506 
8507 // CHECK-LABEL: @test_vqsubb_u8(
8508 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8509 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8510 // CHECK:   [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8511 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
8512 // CHECK:   ret i8 [[TMP2]]
test_vqsubb_u8(uint8_t a,uint8_t b)8513 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
8514   return vqsubb_u8(a, b);
8515 }
8516 
8517 // CHECK-LABEL: @test_vqsubh_u16(
8518 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8519 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8520 // CHECK:   [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8521 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
8522 // CHECK:   ret i16 [[TMP2]]
test_vqsubh_u16(uint16_t a,uint16_t b)8523 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
8524   return vqsubh_u16(a, b);
8525 }
8526 
8527 // CHECK-LABEL: @test_vqsubs_u32(
8528 // CHECK:   [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
8529 // CHECK:   ret i32 [[VQSUBS_U32_I]]
test_vqsubs_u32(uint32_t a,uint32_t b)8530 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
8531   return vqsubs_u32(a, b);
8532 }
8533 
8534 // CHECK-LABEL: @test_vqsubd_u64(
8535 // CHECK:   [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
8536 // CHECK:   ret i64 [[VQSUBD_U64_I]]
test_vqsubd_u64(uint64_t a,uint64_t b)8537 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
8538   return vqsubd_u64(a, b);
8539 }
8540 
8541 // CHECK-LABEL: @test_vshld_s64(
8542 // CHECK:   [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
8543 // CHECK:   ret i64 [[VSHLD_S64_I]]
test_vshld_s64(int64_t a,int64_t b)8544 int64_t test_vshld_s64(int64_t a, int64_t b) {
8545   return vshld_s64(a, b);
8546 }
8547 
8548 // CHECK-LABEL: @test_vshld_u64(
8549 // CHECK:   [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
8550 // CHECK:   ret i64 [[VSHLD_U64_I]]
test_vshld_u64(uint64_t a,uint64_t b)8551 uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
8552   return vshld_u64(a, b);
8553 }
8554 
8555 // CHECK-LABEL: @test_vqshlb_s8(
8556 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8557 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8558 // CHECK:   [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8559 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
8560 // CHECK:   ret i8 [[TMP2]]
test_vqshlb_s8(int8_t a,int8_t b)8561 int8_t test_vqshlb_s8(int8_t a, int8_t b) {
8562   return vqshlb_s8(a, b);
8563 }
8564 
8565 // CHECK-LABEL: @test_vqshlh_s16(
8566 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8567 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8568 // CHECK:   [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8569 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
8570 // CHECK:   ret i16 [[TMP2]]
test_vqshlh_s16(int16_t a,int16_t b)8571 int16_t test_vqshlh_s16(int16_t a, int16_t b) {
8572   return vqshlh_s16(a, b);
8573 }
8574 
8575 // CHECK-LABEL: @test_vqshls_s32(
8576 // CHECK:   [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
8577 // CHECK:   ret i32 [[VQSHLS_S32_I]]
test_vqshls_s32(int32_t a,int32_t b)8578 int32_t test_vqshls_s32(int32_t a, int32_t b) {
8579   return vqshls_s32(a, b);
8580 }
8581 
8582 // CHECK-LABEL: @test_vqshld_s64(
8583 // CHECK:   [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
8584 // CHECK:   ret i64 [[VQSHLD_S64_I]]
test_vqshld_s64(int64_t a,int64_t b)8585 int64_t test_vqshld_s64(int64_t a, int64_t b) {
8586   return vqshld_s64(a, b);
8587 }
8588 
8589 // CHECK-LABEL: @test_vqshlb_u8(
8590 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8591 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8592 // CHECK:   [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8593 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
8594 // CHECK:   ret i8 [[TMP2]]
test_vqshlb_u8(uint8_t a,uint8_t b)8595 uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
8596   return vqshlb_u8(a, b);
8597 }
8598 
8599 // CHECK-LABEL: @test_vqshlh_u16(
8600 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8601 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8602 // CHECK:   [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8603 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
8604 // CHECK:   ret i16 [[TMP2]]
test_vqshlh_u16(uint16_t a,uint16_t b)8605 uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
8606   return vqshlh_u16(a, b);
8607 }
8608 
8609 // CHECK-LABEL: @test_vqshls_u32(
8610 // CHECK:   [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
8611 // CHECK:   ret i32 [[VQSHLS_U32_I]]
test_vqshls_u32(uint32_t a,uint32_t b)8612 uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
8613   return vqshls_u32(a, b);
8614 }
8615 
8616 // CHECK-LABEL: @test_vqshld_u64(
8617 // CHECK:   [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
8618 // CHECK:   ret i64 [[VQSHLD_U64_I]]
test_vqshld_u64(uint64_t a,uint64_t b)8619 uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
8620   return vqshld_u64(a, b);
8621 }
8622 
8623 // CHECK-LABEL: @test_vrshld_s64(
8624 // CHECK:   [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
8625 // CHECK:   ret i64 [[VRSHLD_S64_I]]
test_vrshld_s64(int64_t a,int64_t b)8626 int64_t test_vrshld_s64(int64_t a, int64_t b) {
8627   return vrshld_s64(a, b);
8628 }
8629 
8630 // CHECK-LABEL: @test_vrshld_u64(
8631 // CHECK:   [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
8632 // CHECK:   ret i64 [[VRSHLD_U64_I]]
test_vrshld_u64(uint64_t a,uint64_t b)8633 uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
8634   return vrshld_u64(a, b);
8635 }
8636 
8637 // CHECK-LABEL: @test_vqrshlb_s8(
8638 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8639 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8640 // CHECK:   [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8641 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
8642 // CHECK:   ret i8 [[TMP2]]
test_vqrshlb_s8(int8_t a,int8_t b)8643 int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
8644   return vqrshlb_s8(a, b);
8645 }
8646 
8647 // CHECK-LABEL: @test_vqrshlh_s16(
8648 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8649 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8650 // CHECK:   [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8651 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
8652 // CHECK:   ret i16 [[TMP2]]
test_vqrshlh_s16(int16_t a,int16_t b)8653 int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
8654   return vqrshlh_s16(a, b);
8655 }
8656 
8657 // CHECK-LABEL: @test_vqrshls_s32(
8658 // CHECK:   [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
8659 // CHECK:   ret i32 [[VQRSHLS_S32_I]]
test_vqrshls_s32(int32_t a,int32_t b)8660 int32_t test_vqrshls_s32(int32_t a, int32_t b) {
8661   return vqrshls_s32(a, b);
8662 }
8663 
8664 // CHECK-LABEL: @test_vqrshld_s64(
8665 // CHECK:   [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
8666 // CHECK:   ret i64 [[VQRSHLD_S64_I]]
test_vqrshld_s64(int64_t a,int64_t b)8667 int64_t test_vqrshld_s64(int64_t a, int64_t b) {
8668   return vqrshld_s64(a, b);
8669 }
8670 
8671 // CHECK-LABEL: @test_vqrshlb_u8(
8672 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8673 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8674 // CHECK:   [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8675 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
8676 // CHECK:   ret i8 [[TMP2]]
test_vqrshlb_u8(uint8_t a,uint8_t b)8677 uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
8678   return vqrshlb_u8(a, b);
8679 }
8680 
8681 // CHECK-LABEL: @test_vqrshlh_u16(
8682 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8683 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8684 // CHECK:   [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8685 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
8686 // CHECK:   ret i16 [[TMP2]]
test_vqrshlh_u16(uint16_t a,uint16_t b)8687 uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
8688   return vqrshlh_u16(a, b);
8689 }
8690 
8691 // CHECK-LABEL: @test_vqrshls_u32(
8692 // CHECK:   [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
8693 // CHECK:   ret i32 [[VQRSHLS_U32_I]]
test_vqrshls_u32(uint32_t a,uint32_t b)8694 uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
8695   return vqrshls_u32(a, b);
8696 }
8697 
8698 // CHECK-LABEL: @test_vqrshld_u64(
8699 // CHECK:   [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
8700 // CHECK:   ret i64 [[VQRSHLD_U64_I]]
test_vqrshld_u64(uint64_t a,uint64_t b)8701 uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
8702   return vqrshld_u64(a, b);
8703 }
8704 
8705 // CHECK-LABEL: @test_vpaddd_s64(
8706 // CHECK:   [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
8707 // CHECK:   ret i64 [[VPADDD_S64_I]]
test_vpaddd_s64(int64x2_t a)8708 int64_t test_vpaddd_s64(int64x2_t a) {
8709   return vpaddd_s64(a);
8710 }
8711 
8712 // CHECK-LABEL: @test_vpadds_f32(
8713 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
8714 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
8715 // CHECK:   [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
8716 // CHECK:   ret float [[VPADDD_I]]
test_vpadds_f32(float32x2_t a)8717 float32_t test_vpadds_f32(float32x2_t a) {
8718   return vpadds_f32(a);
8719 }
8720 
8721 // CHECK-LABEL: @test_vpaddd_f64(
8722 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
8723 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
8724 // CHECK:   [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
8725 // CHECK:   ret double [[VPADDD_I]]
test_vpaddd_f64(float64x2_t a)8726 float64_t test_vpaddd_f64(float64x2_t a) {
8727   return vpaddd_f64(a);
8728 }
8729 
8730 // CHECK-LABEL: @test_vpmaxnms_f32(
8731 // CHECK:   [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
8732 // CHECK:   ret float [[VPMAXNMS_F32_I]]
test_vpmaxnms_f32(float32x2_t a)8733 float32_t test_vpmaxnms_f32(float32x2_t a) {
8734   return vpmaxnms_f32(a);
8735 }
8736 
8737 // CHECK-LABEL: @test_vpmaxnmqd_f64(
8738 // CHECK:   [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
8739 // CHECK:   ret double [[VPMAXNMQD_F64_I]]
test_vpmaxnmqd_f64(float64x2_t a)8740 float64_t test_vpmaxnmqd_f64(float64x2_t a) {
8741   return vpmaxnmqd_f64(a);
8742 }
8743 
8744 // CHECK-LABEL: @test_vpmaxs_f32(
8745 // CHECK:   [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
8746 // CHECK:   ret float [[VPMAXS_F32_I]]
test_vpmaxs_f32(float32x2_t a)8747 float32_t test_vpmaxs_f32(float32x2_t a) {
8748   return vpmaxs_f32(a);
8749 }
8750 
8751 // CHECK-LABEL: @test_vpmaxqd_f64(
8752 // CHECK:   [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
8753 // CHECK:   ret double [[VPMAXQD_F64_I]]
test_vpmaxqd_f64(float64x2_t a)8754 float64_t test_vpmaxqd_f64(float64x2_t a) {
8755   return vpmaxqd_f64(a);
8756 }
8757 
8758 // CHECK-LABEL: @test_vpminnms_f32(
8759 // CHECK:   [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
8760 // CHECK:   ret float [[VPMINNMS_F32_I]]
test_vpminnms_f32(float32x2_t a)8761 float32_t test_vpminnms_f32(float32x2_t a) {
8762   return vpminnms_f32(a);
8763 }
8764 
8765 // CHECK-LABEL: @test_vpminnmqd_f64(
8766 // CHECK:   [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
8767 // CHECK:   ret double [[VPMINNMQD_F64_I]]
test_vpminnmqd_f64(float64x2_t a)8768 float64_t test_vpminnmqd_f64(float64x2_t a) {
8769   return vpminnmqd_f64(a);
8770 }
8771 
8772 // CHECK-LABEL: @test_vpmins_f32(
8773 // CHECK:   [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
8774 // CHECK:   ret float [[VPMINS_F32_I]]
test_vpmins_f32(float32x2_t a)8775 float32_t test_vpmins_f32(float32x2_t a) {
8776   return vpmins_f32(a);
8777 }
8778 
8779 // CHECK-LABEL: @test_vpminqd_f64(
8780 // CHECK:   [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
8781 // CHECK:   ret double [[VPMINQD_F64_I]]
test_vpminqd_f64(float64x2_t a)8782 float64_t test_vpminqd_f64(float64x2_t a) {
8783   return vpminqd_f64(a);
8784 }
8785 
8786 // CHECK-LABEL: @test_vqdmulhh_s16(
8787 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8788 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8789 // CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8790 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
8791 // CHECK:   ret i16 [[TMP2]]
test_vqdmulhh_s16(int16_t a,int16_t b)8792 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
8793   return vqdmulhh_s16(a, b);
8794 }
8795 
8796 // CHECK-LABEL: @test_vqdmulhs_s32(
8797 // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
8798 // CHECK:   ret i32 [[VQDMULHS_S32_I]]
test_vqdmulhs_s32(int32_t a,int32_t b)8799 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
8800   return vqdmulhs_s32(a, b);
8801 }
8802 
8803 // CHECK-LABEL: @test_vqrdmulhh_s16(
8804 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8805 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8806 // CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8807 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
8808 // CHECK:   ret i16 [[TMP2]]
test_vqrdmulhh_s16(int16_t a,int16_t b)8809 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
8810   return vqrdmulhh_s16(a, b);
8811 }
8812 
8813 // CHECK-LABEL: @test_vqrdmulhs_s32(
8814 // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b)
8815 // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
test_vqrdmulhs_s32(int32_t a,int32_t b)8816 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
8817   return vqrdmulhs_s32(a, b);
8818 }
8819 
8820 // CHECK-LABEL: @test_vmulxs_f32(
8821 // CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
8822 // CHECK:   ret float [[VMULXS_F32_I]]
test_vmulxs_f32(float32_t a,float32_t b)8823 float32_t test_vmulxs_f32(float32_t a, float32_t b) {
8824   return vmulxs_f32(a, b);
8825 }
8826 
8827 // CHECK-LABEL: @test_vmulxd_f64(
8828 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
8829 // CHECK:   ret double [[VMULXD_F64_I]]
test_vmulxd_f64(float64_t a,float64_t b)8830 float64_t test_vmulxd_f64(float64_t a, float64_t b) {
8831   return vmulxd_f64(a, b);
8832 }
8833 
8834 // CHECK-LABEL: @test_vmulx_f64(
8835 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
8836 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
8837 // CHECK:   [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
8838 // CHECK:   ret <1 x double> [[VMULX2_I]]
test_vmulx_f64(float64x1_t a,float64x1_t b)8839 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
8840   return vmulx_f64(a, b);
8841 }
8842 
8843 // CHECK-LABEL: @test_vrecpss_f32(
8844 // CHECK:   [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
8845 // CHECK:   ret float [[VRECPS_I]]
test_vrecpss_f32(float32_t a,float32_t b)8846 float32_t test_vrecpss_f32(float32_t a, float32_t b) {
8847   return vrecpss_f32(a, b);
8848 }
8849 
8850 // CHECK-LABEL: @test_vrecpsd_f64(
8851 // CHECK:   [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
8852 // CHECK:   ret double [[VRECPS_I]]
test_vrecpsd_f64(float64_t a,float64_t b)8853 float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
8854   return vrecpsd_f64(a, b);
8855 }
8856 
8857 // CHECK-LABEL: @test_vrsqrtss_f32(
8858 // CHECK:   [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
8859 // CHECK:   ret float [[VRSQRTSS_F32_I]]
test_vrsqrtss_f32(float32_t a,float32_t b)8860 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
8861   return vrsqrtss_f32(a, b);
8862 }
8863 
8864 // CHECK-LABEL: @test_vrsqrtsd_f64(
8865 // CHECK:   [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
8866 // CHECK:   ret double [[VRSQRTSD_F64_I]]
test_vrsqrtsd_f64(float64_t a,float64_t b)8867 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
8868   return vrsqrtsd_f64(a, b);
8869 }
8870 
8871 // CHECK-LABEL: @test_vcvts_f32_s32(
8872 // CHECK:   [[TMP0:%.*]] = sitofp i32 %a to float
8873 // CHECK:   ret float [[TMP0]]
test_vcvts_f32_s32(int32_t a)8874 float32_t test_vcvts_f32_s32(int32_t a) {
8875   return vcvts_f32_s32(a);
8876 }
8877 
8878 // CHECK-LABEL: @test_vcvtd_f64_s64(
8879 // CHECK:   [[TMP0:%.*]] = sitofp i64 %a to double
8880 // CHECK:   ret double [[TMP0]]
test_vcvtd_f64_s64(int64_t a)8881 float64_t test_vcvtd_f64_s64(int64_t a) {
8882   return vcvtd_f64_s64(a);
8883 }
8884 
8885 // CHECK-LABEL: @test_vcvts_f32_u32(
8886 // CHECK:   [[TMP0:%.*]] = uitofp i32 %a to float
8887 // CHECK:   ret float [[TMP0]]
test_vcvts_f32_u32(uint32_t a)8888 float32_t test_vcvts_f32_u32(uint32_t a) {
8889   return vcvts_f32_u32(a);
8890 }
8891 
8892 // CHECK-LABEL: @test_vcvtd_f64_u64(
8893 // CHECK:   [[TMP0:%.*]] = uitofp i64 %a to double
8894 // CHECK:   ret double [[TMP0]]
test_vcvtd_f64_u64(uint64_t a)8895 float64_t test_vcvtd_f64_u64(uint64_t a) {
8896   return vcvtd_f64_u64(a);
8897 }
8898 
8899 // CHECK-LABEL: @test_vrecpes_f32(
8900 // CHECK:   [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
8901 // CHECK:   ret float [[VRECPES_F32_I]]
test_vrecpes_f32(float32_t a)8902 float32_t test_vrecpes_f32(float32_t a) {
8903   return vrecpes_f32(a);
8904 }
8905 
8906 // CHECK-LABEL: @test_vrecped_f64(
8907 // CHECK:   [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
8908 // CHECK:   ret double [[VRECPED_F64_I]]
test_vrecped_f64(float64_t a)8909 float64_t test_vrecped_f64(float64_t a) {
8910   return vrecped_f64(a);
8911 }
8912 
8913 // CHECK-LABEL: @test_vrecpxs_f32(
8914 // CHECK:   [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
8915 // CHECK:   ret float [[VRECPXS_F32_I]]
test_vrecpxs_f32(float32_t a)8916 float32_t test_vrecpxs_f32(float32_t a) {
8917   return vrecpxs_f32(a);
8918 }
8919 
8920 // CHECK-LABEL: @test_vrecpxd_f64(
8921 // CHECK:   [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
8922 // CHECK:   ret double [[VRECPXD_F64_I]]
test_vrecpxd_f64(float64_t a)8923 float64_t test_vrecpxd_f64(float64_t a) {
8924   return vrecpxd_f64(a);
8925 }
8926 
8927 // CHECK-LABEL: @test_vrsqrte_u32(
8928 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8929 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
8930 // CHECK:   ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)8931 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
8932   return vrsqrte_u32(a);
8933 }
8934 
8935 // CHECK-LABEL: @test_vrsqrteq_u32(
8936 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8937 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
8938 // CHECK:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)8939 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
8940   return vrsqrteq_u32(a);
8941 }
8942 
8943 // CHECK-LABEL: @test_vrsqrtes_f32(
8944 // CHECK:   [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
8945 // CHECK:   ret float [[VRSQRTES_F32_I]]
test_vrsqrtes_f32(float32_t a)8946 float32_t test_vrsqrtes_f32(float32_t a) {
8947   return vrsqrtes_f32(a);
8948 }
8949 
8950 // CHECK-LABEL: @test_vrsqrted_f64(
8951 // CHECK:   [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
8952 // CHECK:   ret double [[VRSQRTED_F64_I]]
test_vrsqrted_f64(float64_t a)8953 float64_t test_vrsqrted_f64(float64_t a) {
8954   return vrsqrted_f64(a);
8955 }
8956 
8957 // CHECK-LABEL: @test_vld1q_u8(
8958 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
8959 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
8960 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_u8(uint8_t const * a)8961 uint8x16_t test_vld1q_u8(uint8_t const *a) {
8962   return vld1q_u8(a);
8963 }
8964 
8965 // CHECK-LABEL: @test_vld1q_u16(
8966 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
8967 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
8968 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
8969 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_u16(uint16_t const * a)8970 uint16x8_t test_vld1q_u16(uint16_t const *a) {
8971   return vld1q_u16(a);
8972 }
8973 
8974 // CHECK-LABEL: @test_vld1q_u32(
8975 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
8976 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
8977 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
8978 // CHECK:   ret <4 x i32> [[TMP2]]
test_vld1q_u32(uint32_t const * a)8979 uint32x4_t test_vld1q_u32(uint32_t const *a) {
8980   return vld1q_u32(a);
8981 }
8982 
8983 // CHECK-LABEL: @test_vld1q_u64(
8984 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
8985 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
8986 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
8987 // CHECK:   ret <2 x i64> [[TMP2]]
test_vld1q_u64(uint64_t const * a)8988 uint64x2_t test_vld1q_u64(uint64_t const *a) {
8989   return vld1q_u64(a);
8990 }
8991 
8992 // CHECK-LABEL: @test_vld1q_s8(
8993 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
8994 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
8995 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_s8(int8_t const * a)8996 int8x16_t test_vld1q_s8(int8_t const *a) {
8997   return vld1q_s8(a);
8998 }
8999 
9000 // CHECK-LABEL: @test_vld1q_s16(
9001 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9002 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9003 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9004 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_s16(int16_t const * a)9005 int16x8_t test_vld1q_s16(int16_t const *a) {
9006   return vld1q_s16(a);
9007 }
9008 
9009 // CHECK-LABEL: @test_vld1q_s32(
9010 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9011 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9012 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9013 // CHECK:   ret <4 x i32> [[TMP2]]
test_vld1q_s32(int32_t const * a)9014 int32x4_t test_vld1q_s32(int32_t const *a) {
9015   return vld1q_s32(a);
9016 }
9017 
9018 // CHECK-LABEL: @test_vld1q_s64(
9019 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9020 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9021 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9022 // CHECK:   ret <2 x i64> [[TMP2]]
test_vld1q_s64(int64_t const * a)9023 int64x2_t test_vld1q_s64(int64_t const *a) {
9024   return vld1q_s64(a);
9025 }
9026 
9027 // CHECK-LABEL: @test_vld1q_f16(
9028 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
9029 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
9030 // CHECK:   [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]]
9031 // CHECK:   ret <8 x half> [[TMP2]]
test_vld1q_f16(float16_t const * a)9032 float16x8_t test_vld1q_f16(float16_t const *a) {
9033   return vld1q_f16(a);
9034 }
9035 
9036 // CHECK-LABEL: @test_vld1q_f32(
9037 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
9038 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
9039 // CHECK:   [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]]
9040 // CHECK:   ret <4 x float> [[TMP2]]
test_vld1q_f32(float32_t const * a)9041 float32x4_t test_vld1q_f32(float32_t const *a) {
9042   return vld1q_f32(a);
9043 }
9044 
9045 // CHECK-LABEL: @test_vld1q_f64(
9046 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
9047 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
9048 // CHECK:   [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]]
9049 // CHECK:   ret <2 x double> [[TMP2]]
test_vld1q_f64(float64_t const * a)9050 float64x2_t test_vld1q_f64(float64_t const *a) {
9051   return vld1q_f64(a);
9052 }
9053 
9054 // CHECK-LABEL: @test_vld1q_p8(
9055 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9056 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9057 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_p8(poly8_t const * a)9058 poly8x16_t test_vld1q_p8(poly8_t const *a) {
9059   return vld1q_p8(a);
9060 }
9061 
9062 // CHECK-LABEL: @test_vld1q_p16(
9063 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9064 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9065 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9066 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_p16(poly16_t const * a)9067 poly16x8_t test_vld1q_p16(poly16_t const *a) {
9068   return vld1q_p16(a);
9069 }
9070 
9071 // CHECK-LABEL: @test_vld1_u8(
9072 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9073 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9074 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_u8(uint8_t const * a)9075 uint8x8_t test_vld1_u8(uint8_t const *a) {
9076   return vld1_u8(a);
9077 }
9078 
9079 // CHECK-LABEL: @test_vld1_u16(
9080 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9081 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9082 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9083 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_u16(uint16_t const * a)9084 uint16x4_t test_vld1_u16(uint16_t const *a) {
9085   return vld1_u16(a);
9086 }
9087 
9088 // CHECK-LABEL: @test_vld1_u32(
9089 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9090 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9091 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
9092 // CHECK:   ret <2 x i32> [[TMP2]]
test_vld1_u32(uint32_t const * a)9093 uint32x2_t test_vld1_u32(uint32_t const *a) {
9094   return vld1_u32(a);
9095 }
9096 
9097 // CHECK-LABEL: @test_vld1_u64(
9098 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9099 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9100 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
9101 // CHECK:   ret <1 x i64> [[TMP2]]
test_vld1_u64(uint64_t const * a)9102 uint64x1_t test_vld1_u64(uint64_t const *a) {
9103   return vld1_u64(a);
9104 }
9105 
9106 // CHECK-LABEL: @test_vld1_s8(
9107 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9108 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9109 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_s8(int8_t const * a)9110 int8x8_t test_vld1_s8(int8_t const *a) {
9111   return vld1_s8(a);
9112 }
9113 
9114 // CHECK-LABEL: @test_vld1_s16(
9115 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9116 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9117 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9118 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_s16(int16_t const * a)9119 int16x4_t test_vld1_s16(int16_t const *a) {
9120   return vld1_s16(a);
9121 }
9122 
9123 // CHECK-LABEL: @test_vld1_s32(
9124 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9125 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9126 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
9127 // CHECK:   ret <2 x i32> [[TMP2]]
test_vld1_s32(int32_t const * a)9128 int32x2_t test_vld1_s32(int32_t const *a) {
9129   return vld1_s32(a);
9130 }
9131 
9132 // CHECK-LABEL: @test_vld1_s64(
9133 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9134 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9135 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
9136 // CHECK:   ret <1 x i64> [[TMP2]]
test_vld1_s64(int64_t const * a)9137 int64x1_t test_vld1_s64(int64_t const *a) {
9138   return vld1_s64(a);
9139 }
9140 
9141 // CHECK-LABEL: @test_vld1_f16(
9142 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
9143 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
9144 // CHECK:   [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]]
9145 // CHECK:   ret <4 x half> [[TMP2]]
test_vld1_f16(float16_t const * a)9146 float16x4_t test_vld1_f16(float16_t const *a) {
9147   return vld1_f16(a);
9148 }
9149 
9150 // CHECK-LABEL: @test_vld1_f32(
9151 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
9152 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
9153 // CHECK:   [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]]
9154 // CHECK:   ret <2 x float> [[TMP2]]
test_vld1_f32(float32_t const * a)9155 float32x2_t test_vld1_f32(float32_t const *a) {
9156   return vld1_f32(a);
9157 }
9158 
9159 // CHECK-LABEL: @test_vld1_f64(
9160 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
9161 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
9162 // CHECK:   [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]]
9163 // CHECK:   ret <1 x double> [[TMP2]]
test_vld1_f64(float64_t const * a)9164 float64x1_t test_vld1_f64(float64_t const *a) {
9165   return vld1_f64(a);
9166 }
9167 
9168 // CHECK-LABEL: @test_vld1_p8(
9169 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9170 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9171 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_p8(poly8_t const * a)9172 poly8x8_t test_vld1_p8(poly8_t const *a) {
9173   return vld1_p8(a);
9174 }
9175 
9176 // CHECK-LABEL: @test_vld1_p16(
9177 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9178 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9179 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9180 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_p16(poly16_t const * a)9181 poly16x4_t test_vld1_p16(poly16_t const *a) {
9182   return vld1_p16(a);
9183 }
9184 
9185 // CHECK-LABEL: @test_vld2q_u8(
9186 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
9187 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
9188 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9189 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9190 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9191 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9192 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9193 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
9194 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9195 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9196 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
9197 // CHECK:   ret %struct.uint8x16x2_t [[TMP5]]
test_vld2q_u8(uint8_t const * a)9198 uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
9199   return vld2q_u8(a);
9200 }
9201 
9202 // CHECK-LABEL: @test_vld2q_u16(
9203 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
9204 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
9205 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9206 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9207 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9208 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9209 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9210 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9211 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
9212 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9213 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9214 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
9215 // CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
test_vld2q_u16(uint16_t const * a)9216 uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
9217   return vld2q_u16(a);
9218 }
9219 
9220 // CHECK-LABEL: @test_vld2q_u32(
9221 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
9222 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
9223 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9224 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9225 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9226 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9227 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9228 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9229 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
9230 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9231 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9232 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
9233 // CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
test_vld2q_u32(uint32_t const * a)9234 uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
9235   return vld2q_u32(a);
9236 }
9237 
9238 // CHECK-LABEL: @test_vld2q_u64(
9239 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
9240 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
9241 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9242 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9243 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9244 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9245 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9246 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9247 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
9248 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9249 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9250 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
9251 // CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
test_vld2q_u64(uint64_t const * a)9252 uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
9253   return vld2q_u64(a);
9254 }
9255 
9256 // CHECK-LABEL: @test_vld2q_s8(
9257 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
9258 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
9259 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9260 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9261 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9262 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9263 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9264 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
9265 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9266 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9267 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
9268 // CHECK:   ret %struct.int8x16x2_t [[TMP5]]
test_vld2q_s8(int8_t const * a)9269 int8x16x2_t test_vld2q_s8(int8_t const *a) {
9270   return vld2q_s8(a);
9271 }
9272 
9273 // CHECK-LABEL: @test_vld2q_s16(
9274 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
9275 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
9276 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9277 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9278 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9279 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9280 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9281 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9282 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
9283 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9284 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9285 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
9286 // CHECK:   ret %struct.int16x8x2_t [[TMP6]]
test_vld2q_s16(int16_t const * a)9287 int16x8x2_t test_vld2q_s16(int16_t const *a) {
9288   return vld2q_s16(a);
9289 }
9290 
9291 // CHECK-LABEL: @test_vld2q_s32(
9292 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
9293 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
9294 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9295 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9296 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9297 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9298 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9299 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9300 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
9301 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9302 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9303 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
9304 // CHECK:   ret %struct.int32x4x2_t [[TMP6]]
test_vld2q_s32(int32_t const * a)9305 int32x4x2_t test_vld2q_s32(int32_t const *a) {
9306   return vld2q_s32(a);
9307 }
9308 
9309 // CHECK-LABEL: @test_vld2q_s64(
9310 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
9311 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
9312 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9313 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9314 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9315 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9316 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9317 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9318 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
9319 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9320 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9321 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
9322 // CHECK:   ret %struct.int64x2x2_t [[TMP6]]
test_vld2q_s64(int64_t const * a)9323 int64x2x2_t test_vld2q_s64(int64_t const *a) {
9324   return vld2q_s64(a);
9325 }
9326 
9327 // CHECK-LABEL: @test_vld2q_f16(
9328 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
9329 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
9330 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9331 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
9332 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9333 // CHECK:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9334 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
9335 // CHECK:   store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]]
9336 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
9337 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9338 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9339 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
9340 // CHECK:   ret %struct.float16x8x2_t [[TMP6]]
test_vld2q_f16(float16_t const * a)9341 float16x8x2_t test_vld2q_f16(float16_t const *a) {
9342   return vld2q_f16(a);
9343 }
9344 
9345 // CHECK-LABEL: @test_vld2q_f32(
9346 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
9347 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
9348 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9349 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
9350 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9351 // CHECK:   [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9352 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
9353 // CHECK:   store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
9354 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
9355 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9356 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9357 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
9358 // CHECK:   ret %struct.float32x4x2_t [[TMP6]]
test_vld2q_f32(float32_t const * a)9359 float32x4x2_t test_vld2q_f32(float32_t const *a) {
9360   return vld2q_f32(a);
9361 }
9362 
9363 // CHECK-LABEL: @test_vld2q_f64(
9364 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
9365 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
9366 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9367 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
9368 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9369 // CHECK:   [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9370 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
9371 // CHECK:   store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
9372 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
9373 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9374 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9375 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
9376 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
test_vld2q_f64(float64_t const * a)9377 float64x2x2_t test_vld2q_f64(float64_t const *a) {
9378   return vld2q_f64(a);
9379 }
9380 
9381 // CHECK-LABEL: @test_vld2q_p8(
9382 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
9383 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
9384 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9385 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9386 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9387 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9388 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9389 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
9390 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9391 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9392 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
9393 // CHECK:   ret %struct.poly8x16x2_t [[TMP5]]
test_vld2q_p8(poly8_t const * a)9394 poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
9395   return vld2q_p8(a);
9396 }
9397 
9398 // CHECK-LABEL: @test_vld2q_p16(
9399 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
9400 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
9401 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9402 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9403 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9404 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9405 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9406 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9407 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
9408 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9409 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9410 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
9411 // CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
test_vld2q_p16(poly16_t const * a)9412 poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
9413   return vld2q_p16(a);
9414 }
9415 
9416 // CHECK-LABEL: @test_vld2_u8(
9417 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
9418 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
9419 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9420 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9421 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9422 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9423 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9424 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
9425 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9426 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9427 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
9428 // CHECK:   ret %struct.uint8x8x2_t [[TMP5]]
test_vld2_u8(uint8_t const * a)9429 uint8x8x2_t test_vld2_u8(uint8_t const *a) {
9430   return vld2_u8(a);
9431 }
9432 
9433 // CHECK-LABEL: @test_vld2_u16(
9434 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
9435 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
9436 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9437 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9438 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9439 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9440 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9441 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9442 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
9443 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9444 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9445 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
9446 // CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
test_vld2_u16(uint16_t const * a)9447 uint16x4x2_t test_vld2_u16(uint16_t const *a) {
9448   return vld2_u16(a);
9449 }
9450 
9451 // CHECK-LABEL: @test_vld2_u32(
9452 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
9453 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
9454 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9455 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9456 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9457 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9458 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9459 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9460 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
9461 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9462 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9463 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
9464 // CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
test_vld2_u32(uint32_t const * a)9465 uint32x2x2_t test_vld2_u32(uint32_t const *a) {
9466   return vld2_u32(a);
9467 }
9468 
9469 // CHECK-LABEL: @test_vld2_u64(
9470 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
9471 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
9472 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9473 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9474 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9475 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9476 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9477 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9478 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
9479 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9480 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9481 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
9482 // CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
test_vld2_u64(uint64_t const * a)9483 uint64x1x2_t test_vld2_u64(uint64_t const *a) {
9484   return vld2_u64(a);
9485 }
9486 
9487 // CHECK-LABEL: @test_vld2_s8(
9488 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
9489 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
9490 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9491 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9492 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9493 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9494 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9495 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
9496 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9497 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9498 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
9499 // CHECK:   ret %struct.int8x8x2_t [[TMP5]]
test_vld2_s8(int8_t const * a)9500 int8x8x2_t test_vld2_s8(int8_t const *a) {
9501   return vld2_s8(a);
9502 }
9503 
9504 // CHECK-LABEL: @test_vld2_s16(
9505 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
9506 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
9507 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9508 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9509 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9510 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9511 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9512 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9513 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
9514 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9515 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9516 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
9517 // CHECK:   ret %struct.int16x4x2_t [[TMP6]]
test_vld2_s16(int16_t const * a)9518 int16x4x2_t test_vld2_s16(int16_t const *a) {
9519   return vld2_s16(a);
9520 }
9521 
9522 // CHECK-LABEL: @test_vld2_s32(
9523 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
9524 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
9525 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9526 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9527 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9528 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9529 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9530 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9531 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
9532 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9533 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9534 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
9535 // CHECK:   ret %struct.int32x2x2_t [[TMP6]]
test_vld2_s32(int32_t const * a)9536 int32x2x2_t test_vld2_s32(int32_t const *a) {
9537   return vld2_s32(a);
9538 }
9539 
9540 // CHECK-LABEL: @test_vld2_s64(
9541 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
9542 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
9543 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9544 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9545 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9546 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9547 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9548 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9549 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
9550 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9551 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9552 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
9553 // CHECK:   ret %struct.int64x1x2_t [[TMP6]]
test_vld2_s64(int64_t const * a)9554 int64x1x2_t test_vld2_s64(int64_t const *a) {
9555   return vld2_s64(a);
9556 }
9557 
9558 // CHECK-LABEL: @test_vld2_f16(
9559 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
9560 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
9561 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9562 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
9563 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
9564 // CHECK:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>* [[TMP2]])
9565 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
9566 // CHECK:   store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]]
9567 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
9568 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9569 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9570 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
9571 // CHECK:   ret %struct.float16x4x2_t [[TMP6]]
test_vld2_f16(float16_t const * a)9572 float16x4x2_t test_vld2_f16(float16_t const *a) {
9573   return vld2_f16(a);
9574 }
9575 
9576 // CHECK-LABEL: @test_vld2_f32(
9577 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
9578 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
9579 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9580 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
9581 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
9582 // CHECK:   [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
9583 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
9584 // CHECK:   store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
9585 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
9586 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9587 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9588 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
9589 // CHECK:   ret %struct.float32x2x2_t [[TMP6]]
test_vld2_f32(float32_t const * a)9590 float32x2x2_t test_vld2_f32(float32_t const *a) {
9591   return vld2_f32(a);
9592 }
9593 
9594 // CHECK-LABEL: @test_vld2_f64(
9595 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
9596 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
9597 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9598 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
9599 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
9600 // CHECK:   [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]])
9601 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
9602 // CHECK:   store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
9603 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
9604 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9605 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9606 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
9607 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
test_vld2_f64(float64_t const * a)9608 float64x1x2_t test_vld2_f64(float64_t const *a) {
9609   return vld2_f64(a);
9610 }
9611 
9612 // CHECK-LABEL: @test_vld2_p8(
9613 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
9614 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
9615 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9616 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9617 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9618 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9619 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9620 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
9621 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9622 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9623 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
9624 // CHECK:   ret %struct.poly8x8x2_t [[TMP5]]
test_vld2_p8(poly8_t const * a)9625 poly8x8x2_t test_vld2_p8(poly8_t const *a) {
9626   return vld2_p8(a);
9627 }
9628 
9629 // CHECK-LABEL: @test_vld2_p16(
9630 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
9631 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
9632 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9633 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9634 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9635 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9636 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9637 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9638 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
9639 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9640 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9641 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
9642 // CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
test_vld2_p16(poly16_t const * a)9643 poly16x4x2_t test_vld2_p16(poly16_t const *a) {
9644   return vld2_p16(a);
9645 }
9646 
9647 // CHECK-LABEL: @test_vld3q_u8(
9648 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
9649 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
9650 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9651 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9652 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9653 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9654 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9655 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
9656 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9657 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9658 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
9659 // CHECK:   ret %struct.uint8x16x3_t [[TMP5]]
test_vld3q_u8(uint8_t const * a)9660 uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
9661   return vld3q_u8(a);
9662 }
9663 
9664 // CHECK-LABEL: @test_vld3q_u16(
9665 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
9666 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
9667 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9668 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9669 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9670 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9671 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9672 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9673 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
9674 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9675 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9676 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
9677 // CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
test_vld3q_u16(uint16_t const * a)9678 uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
9679   return vld3q_u16(a);
9680 }
9681 
9682 // CHECK-LABEL: @test_vld3q_u32(
9683 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
9684 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
9685 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9686 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9687 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9688 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9689 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9690 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9691 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
9692 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9693 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9694 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
9695 // CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
test_vld3q_u32(uint32_t const * a)9696 uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
9697   return vld3q_u32(a);
9698 }
9699 
9700 // CHECK-LABEL: @test_vld3q_u64(
9701 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
9702 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
9703 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9704 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9705 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9706 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9707 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9708 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9709 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
9710 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9711 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9712 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
9713 // CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
test_vld3q_u64(uint64_t const * a)9714 uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
9715   return vld3q_u64(a);
9716 }
9717 
9718 // CHECK-LABEL: @test_vld3q_s8(
9719 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
9720 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
9721 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9722 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9723 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9724 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9725 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9726 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
9727 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9728 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9729 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
9730 // CHECK:   ret %struct.int8x16x3_t [[TMP5]]
test_vld3q_s8(int8_t const * a)9731 int8x16x3_t test_vld3q_s8(int8_t const *a) {
9732   return vld3q_s8(a);
9733 }
9734 
9735 // CHECK-LABEL: @test_vld3q_s16(
9736 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
9737 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
9738 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9739 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9740 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9741 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9742 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9743 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9744 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
9745 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9746 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9747 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
9748 // CHECK:   ret %struct.int16x8x3_t [[TMP6]]
test_vld3q_s16(int16_t const * a)9749 int16x8x3_t test_vld3q_s16(int16_t const *a) {
9750   return vld3q_s16(a);
9751 }
9752 
9753 // CHECK-LABEL: @test_vld3q_s32(
9754 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
9755 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
9756 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9757 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9758 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9759 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9760 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9761 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9762 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
9763 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9764 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9765 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
9766 // CHECK:   ret %struct.int32x4x3_t [[TMP6]]
test_vld3q_s32(int32_t const * a)9767 int32x4x3_t test_vld3q_s32(int32_t const *a) {
9768   return vld3q_s32(a);
9769 }
9770 
9771 // CHECK-LABEL: @test_vld3q_s64(
9772 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
9773 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
9774 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9775 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9776 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9777 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9778 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9779 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9780 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
9781 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9782 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9783 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
9784 // CHECK:   ret %struct.int64x2x3_t [[TMP6]]
test_vld3q_s64(int64_t const * a)9785 int64x2x3_t test_vld3q_s64(int64_t const *a) {
9786   return vld3q_s64(a);
9787 }
9788 
9789 // CHECK-LABEL: @test_vld3q_f16(
9790 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
9791 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
9792 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9793 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
9794 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9795 // CHECK:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9796 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
9797 // CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
9798 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
9799 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9800 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9801 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
9802 // CHECK:   ret %struct.float16x8x3_t [[TMP6]]
test_vld3q_f16(float16_t const * a)9803 float16x8x3_t test_vld3q_f16(float16_t const *a) {
9804   return vld3q_f16(a);
9805 }
9806 
9807 // CHECK-LABEL: @test_vld3q_f32(
9808 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
9809 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
9810 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9811 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
9812 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9813 // CHECK:   [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9814 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
9815 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
9816 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
9817 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9818 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9819 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
9820 // CHECK:   ret %struct.float32x4x3_t [[TMP6]]
test_vld3q_f32(float32_t const * a)9821 float32x4x3_t test_vld3q_f32(float32_t const *a) {
9822   return vld3q_f32(a);
9823 }
9824 
9825 // CHECK-LABEL: @test_vld3q_f64(
9826 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
9827 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
9828 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9829 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
9830 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9831 // CHECK:   [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9832 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
9833 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
9834 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
9835 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9836 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9837 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
9838 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
test_vld3q_f64(float64_t const * a)9839 float64x2x3_t test_vld3q_f64(float64_t const *a) {
9840   return vld3q_f64(a);
9841 }
9842 
9843 // CHECK-LABEL: @test_vld3q_p8(
9844 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
9845 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
9846 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9847 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9848 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9849 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9850 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9851 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
9852 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9853 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9854 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
9855 // CHECK:   ret %struct.poly8x16x3_t [[TMP5]]
test_vld3q_p8(poly8_t const * a)9856 poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
9857   return vld3q_p8(a);
9858 }
9859 
9860 // CHECK-LABEL: @test_vld3q_p16(
9861 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
9862 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
9863 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9864 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9865 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9866 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9867 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9868 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9869 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
9870 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9871 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9872 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
9873 // CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
test_vld3q_p16(poly16_t const * a)9874 poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
9875   return vld3q_p16(a);
9876 }
9877 
9878 // CHECK-LABEL: @test_vld3_u8(
9879 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
9880 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
9881 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9882 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9883 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9884 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
9885 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
9886 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
9887 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9888 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
9889 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
9890 // CHECK:   ret %struct.uint8x8x3_t [[TMP5]]
test_vld3_u8(uint8_t const * a)9891 uint8x8x3_t test_vld3_u8(uint8_t const *a) {
9892   return vld3_u8(a);
9893 }
9894 
9895 // CHECK-LABEL: @test_vld3_u16(
9896 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
9897 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
9898 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
9899 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9900 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9901 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9902 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
9903 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
9904 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
9905 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
9906 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9907 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
9908 // CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
test_vld3_u16(uint16_t const * a)9909 uint16x4x3_t test_vld3_u16(uint16_t const *a) {
9910   return vld3_u16(a);
9911 }
9912 
9913 // CHECK-LABEL: @test_vld3_u32(
9914 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
9915 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
9916 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
9917 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9918 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9919 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9920 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
9921 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
9922 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
9923 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
9924 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9925 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
9926 // CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
test_vld3_u32(uint32_t const * a)9927 uint32x2x3_t test_vld3_u32(uint32_t const *a) {
9928   return vld3_u32(a);
9929 }
9930 
9931 // CHECK-LABEL: @test_vld3_u64(
9932 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
9933 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
9934 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
9935 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9936 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9937 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9938 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
9939 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
9940 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
9941 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
9942 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9943 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
9944 // CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
test_vld3_u64(uint64_t const * a)9945 uint64x1x3_t test_vld3_u64(uint64_t const *a) {
9946   return vld3_u64(a);
9947 }
9948 
9949 // CHECK-LABEL: @test_vld3_s8(
9950 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
9951 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
9952 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
9953 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9954 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9955 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
9956 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
9957 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
9958 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
9959 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
9960 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
9961 // CHECK:   ret %struct.int8x8x3_t [[TMP5]]
test_vld3_s8(int8_t const * a)9962 int8x8x3_t test_vld3_s8(int8_t const *a) {
9963   return vld3_s8(a);
9964 }
9965 
9966 // CHECK-LABEL: @test_vld3_s16(
9967 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
9968 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
9969 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
9970 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9971 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9972 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9973 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
9974 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
9975 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
9976 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
9977 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9978 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
9979 // CHECK:   ret %struct.int16x4x3_t [[TMP6]]
test_vld3_s16(int16_t const * a)9980 int16x4x3_t test_vld3_s16(int16_t const *a) {
9981   return vld3_s16(a);
9982 }
9983 
9984 // CHECK-LABEL: @test_vld3_s32(
9985 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
9986 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
9987 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
9988 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9989 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9990 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9991 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
9992 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
9993 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
9994 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
9995 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9996 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
9997 // CHECK:   ret %struct.int32x2x3_t [[TMP6]]
test_vld3_s32(int32_t const * a)9998 int32x2x3_t test_vld3_s32(int32_t const *a) {
9999   return vld3_s32(a);
10000 }
10001 
10002 // CHECK-LABEL: @test_vld3_s64(
10003 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
10004 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
10005 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10006 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10007 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10008 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10009 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10010 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10011 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
10012 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10013 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10014 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
10015 // CHECK:   ret %struct.int64x1x3_t [[TMP6]]
test_vld3_s64(int64_t const * a)10016 int64x1x3_t test_vld3_s64(int64_t const *a) {
10017   return vld3_s64(a);
10018 }
10019 
10020 // CHECK-LABEL: @test_vld3_f16(
10021 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
10022 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
10023 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10024 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10025 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10026 // CHECK:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10027 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
10028 // CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10029 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
10030 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10031 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10032 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
10033 // CHECK:   ret %struct.float16x4x3_t [[TMP6]]
test_vld3_f16(float16_t const * a)10034 float16x4x3_t test_vld3_f16(float16_t const *a) {
10035   return vld3_f16(a);
10036 }
10037 
10038 // CHECK-LABEL: @test_vld3_f32(
10039 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
10040 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
10041 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10042 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10043 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10044 // CHECK:   [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10045 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
10046 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10047 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
10048 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10049 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10050 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
10051 // CHECK:   ret %struct.float32x2x3_t [[TMP6]]
test_vld3_f32(float32_t const * a)10052 float32x2x3_t test_vld3_f32(float32_t const *a) {
10053   return vld3_f32(a);
10054 }
10055 
10056 // CHECK-LABEL: @test_vld3_f64(
10057 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
10058 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
10059 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10060 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10061 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10062 // CHECK:   [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10063 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
10064 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10065 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
10066 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10067 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10068 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
10069 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
test_vld3_f64(float64_t const * a)10070 float64x1x3_t test_vld3_f64(float64_t const *a) {
10071   return vld3_f64(a);
10072 }
10073 
10074 // CHECK-LABEL: @test_vld3_p8(
10075 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
10076 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
10077 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10078 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10079 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10080 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10081 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10082 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
10083 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10084 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
10085 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
10086 // CHECK:   ret %struct.poly8x8x3_t [[TMP5]]
test_vld3_p8(poly8_t const * a)10087 poly8x8x3_t test_vld3_p8(poly8_t const *a) {
10088   return vld3_p8(a);
10089 }
10090 
10091 // CHECK-LABEL: @test_vld3_p16(
10092 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
10093 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
10094 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10095 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10096 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10097 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10098 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10099 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10100 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
10101 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10102 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10103 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
10104 // CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
test_vld3_p16(poly16_t const * a)10105 poly16x4x3_t test_vld3_p16(poly16_t const *a) {
10106   return vld3_p16(a);
10107 }
10108 
10109 // CHECK-LABEL: @test_vld4q_u8(
10110 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
10111 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
10112 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10113 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10114 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10115 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10116 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10117 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
10118 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10119 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10120 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
10121 // CHECK:   ret %struct.uint8x16x4_t [[TMP5]]
test_vld4q_u8(uint8_t const * a)10122 uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
10123   return vld4q_u8(a);
10124 }
10125 
10126 // CHECK-LABEL: @test_vld4q_u16(
10127 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
10128 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
10129 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10130 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10131 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10132 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10133 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10134 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10135 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
10136 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10137 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10138 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
10139 // CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
test_vld4q_u16(uint16_t const * a)10140 uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
10141   return vld4q_u16(a);
10142 }
10143 
10144 // CHECK-LABEL: @test_vld4q_u32(
10145 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
10146 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
10147 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10148 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10149 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10150 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10151 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10152 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10153 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
10154 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10155 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10156 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
10157 // CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
test_vld4q_u32(uint32_t const * a)10158 uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
10159   return vld4q_u32(a);
10160 }
10161 
10162 // CHECK-LABEL: @test_vld4q_u64(
10163 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
10164 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
10165 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10166 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10167 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10168 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10169 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10170 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10171 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
10172 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10173 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10174 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
10175 // CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
test_vld4q_u64(uint64_t const * a)10176 uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
10177   return vld4q_u64(a);
10178 }
10179 
10180 // CHECK-LABEL: @test_vld4q_s8(
10181 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
10182 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
10183 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10184 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10185 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10186 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10187 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10188 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
10189 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10190 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10191 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
10192 // CHECK:   ret %struct.int8x16x4_t [[TMP5]]
test_vld4q_s8(int8_t const * a)10193 int8x16x4_t test_vld4q_s8(int8_t const *a) {
10194   return vld4q_s8(a);
10195 }
10196 
10197 // CHECK-LABEL: @test_vld4q_s16(
10198 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
10199 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
10200 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10201 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10202 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10203 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10204 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10205 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10206 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
10207 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10208 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10209 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
10210 // CHECK:   ret %struct.int16x8x4_t [[TMP6]]
test_vld4q_s16(int16_t const * a)10211 int16x8x4_t test_vld4q_s16(int16_t const *a) {
10212   return vld4q_s16(a);
10213 }
10214 
10215 // CHECK-LABEL: @test_vld4q_s32(
10216 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
10217 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
10218 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10219 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10220 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10221 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10222 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10223 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10224 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
10225 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10226 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10227 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
10228 // CHECK:   ret %struct.int32x4x4_t [[TMP6]]
test_vld4q_s32(int32_t const * a)10229 int32x4x4_t test_vld4q_s32(int32_t const *a) {
10230   return vld4q_s32(a);
10231 }
10232 
10233 // CHECK-LABEL: @test_vld4q_s64(
10234 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
10235 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
10236 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10237 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10238 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10239 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10240 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10241 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10242 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
10243 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10244 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10245 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
10246 // CHECK:   ret %struct.int64x2x4_t [[TMP6]]
test_vld4q_s64(int64_t const * a)10247 int64x2x4_t test_vld4q_s64(int64_t const *a) {
10248   return vld4q_s64(a);
10249 }
10250 
10251 // CHECK-LABEL: @test_vld4q_f16(
10252 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
10253 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
10254 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10255 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10256 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
10257 // CHECK:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>* [[TMP2]])
10258 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
10259 // CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
10260 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
10261 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10262 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10263 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
10264 // CHECK:   ret %struct.float16x8x4_t [[TMP6]]
test_vld4q_f16(float16_t const * a)10265 float16x8x4_t test_vld4q_f16(float16_t const *a) {
10266   return vld4q_f16(a);
10267 }
10268 
10269 // CHECK-LABEL: @test_vld4q_f32(
10270 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
10271 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
10272 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10273 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10274 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10275 // CHECK:   [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10276 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
10277 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
10278 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
10279 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10280 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10281 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
10282 // CHECK:   ret %struct.float32x4x4_t [[TMP6]]
test_vld4q_f32(float32_t const * a)10283 float32x4x4_t test_vld4q_f32(float32_t const *a) {
10284   return vld4q_f32(a);
10285 }
10286 
10287 // CHECK-LABEL: @test_vld4q_f64(
10288 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
10289 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
10290 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10291 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10292 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10293 // CHECK:   [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10294 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
10295 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
10296 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
10297 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10298 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10299 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
10300 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
test_vld4q_f64(float64_t const * a)10301 float64x2x4_t test_vld4q_f64(float64_t const *a) {
10302   return vld4q_f64(a);
10303 }
10304 
10305 // CHECK-LABEL: @test_vld4q_p8(
10306 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
10307 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
10308 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10309 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10310 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10311 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10312 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10313 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
10314 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10315 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10316 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
10317 // CHECK:   ret %struct.poly8x16x4_t [[TMP5]]
test_vld4q_p8(poly8_t const * a)10318 poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
10319   return vld4q_p8(a);
10320 }
10321 
10322 // CHECK-LABEL: @test_vld4q_p16(
10323 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
10324 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
10325 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10326 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10327 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10328 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10329 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10330 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10331 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
10332 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10333 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10334 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
10335 // CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
test_vld4q_p16(poly16_t const * a)10336 poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
10337   return vld4q_p16(a);
10338 }
10339 
10340 // CHECK-LABEL: @test_vld4_u8(
10341 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
10342 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
10343 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10344 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10345 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10346 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10347 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10348 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
10349 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10350 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10351 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
10352 // CHECK:   ret %struct.uint8x8x4_t [[TMP5]]
test_vld4_u8(uint8_t const * a)10353 uint8x8x4_t test_vld4_u8(uint8_t const *a) {
10354   return vld4_u8(a);
10355 }
10356 
10357 // CHECK-LABEL: @test_vld4_u16(
10358 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
10359 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
10360 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10361 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10362 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10363 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10364 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10365 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10366 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
10367 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10368 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10369 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
10370 // CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
test_vld4_u16(uint16_t const * a)10371 uint16x4x4_t test_vld4_u16(uint16_t const *a) {
10372   return vld4_u16(a);
10373 }
10374 
10375 // CHECK-LABEL: @test_vld4_u32(
10376 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
10377 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
10378 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10379 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10380 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10381 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10382 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10383 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10384 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
10385 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10386 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10387 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
10388 // CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
test_vld4_u32(uint32_t const * a)10389 uint32x2x4_t test_vld4_u32(uint32_t const *a) {
10390   return vld4_u32(a);
10391 }
10392 
10393 // CHECK-LABEL: @test_vld4_u64(
10394 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
10395 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
10396 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10397 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10398 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10399 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10400 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10401 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10402 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
10403 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10404 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10405 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
10406 // CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
test_vld4_u64(uint64_t const * a)10407 uint64x1x4_t test_vld4_u64(uint64_t const *a) {
10408   return vld4_u64(a);
10409 }
10410 
10411 // CHECK-LABEL: @test_vld4_s8(
10412 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
10413 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
10414 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10415 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10416 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10417 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10418 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10419 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
10420 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10421 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10422 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
10423 // CHECK:   ret %struct.int8x8x4_t [[TMP5]]
test_vld4_s8(int8_t const * a)10424 int8x8x4_t test_vld4_s8(int8_t const *a) {
10425   return vld4_s8(a);
10426 }
10427 
10428 // CHECK-LABEL: @test_vld4_s16(
10429 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
10430 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
10431 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10432 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10433 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10434 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10435 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10436 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10437 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
10438 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10439 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10440 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
10441 // CHECK:   ret %struct.int16x4x4_t [[TMP6]]
test_vld4_s16(int16_t const * a)10442 int16x4x4_t test_vld4_s16(int16_t const *a) {
10443   return vld4_s16(a);
10444 }
10445 
10446 // CHECK-LABEL: @test_vld4_s32(
10447 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
10448 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
10449 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10450 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10451 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10452 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10453 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10454 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10455 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
10456 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10457 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10458 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
10459 // CHECK:   ret %struct.int32x2x4_t [[TMP6]]
test_vld4_s32(int32_t const * a)10460 int32x2x4_t test_vld4_s32(int32_t const *a) {
10461   return vld4_s32(a);
10462 }
10463 
10464 // CHECK-LABEL: @test_vld4_s64(
10465 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
10466 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
10467 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10468 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10469 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10470 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10471 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10472 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10473 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
10474 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10475 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10476 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
10477 // CHECK:   ret %struct.int64x1x4_t [[TMP6]]
test_vld4_s64(int64_t const * a)10478 int64x1x4_t test_vld4_s64(int64_t const *a) {
10479   return vld4_s64(a);
10480 }
10481 
10482 // CHECK-LABEL: @test_vld4_f16(
10483 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
10484 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
10485 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10486 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10487 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10488 // CHECK:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10489 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
10490 // CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10491 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
10492 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10493 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10494 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
10495 // CHECK:   ret %struct.float16x4x4_t [[TMP6]]
test_vld4_f16(float16_t const * a)10496 float16x4x4_t test_vld4_f16(float16_t const *a) {
10497   return vld4_f16(a);
10498 }
10499 
10500 // CHECK-LABEL: @test_vld4_f32(
10501 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
10502 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
10503 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10504 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10505 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10506 // CHECK:   [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10507 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
10508 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10509 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
10510 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10511 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10512 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
10513 // CHECK:   ret %struct.float32x2x4_t [[TMP6]]
test_vld4_f32(float32_t const * a)10514 float32x2x4_t test_vld4_f32(float32_t const *a) {
10515   return vld4_f32(a);
10516 }
10517 
10518 // CHECK-LABEL: @test_vld4_f64(
10519 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
10520 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
10521 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10522 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10523 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10524 // CHECK:   [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10525 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
10526 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10527 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
10528 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10529 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10530 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
10531 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
test_vld4_f64(float64_t const * a)10532 float64x1x4_t test_vld4_f64(float64_t const *a) {
10533   return vld4_f64(a);
10534 }
10535 
10536 // CHECK-LABEL: @test_vld4_p8(
10537 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
10538 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
10539 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10540 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10541 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10542 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10543 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10544 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
10545 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10546 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10547 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
10548 // CHECK:   ret %struct.poly8x8x4_t [[TMP5]]
test_vld4_p8(poly8_t const * a)10549 poly8x8x4_t test_vld4_p8(poly8_t const *a) {
10550   return vld4_p8(a);
10551 }
10552 
10553 // CHECK-LABEL: @test_vld4_p16(
10554 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
10555 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
10556 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10557 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10558 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10559 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10560 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10561 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10562 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
10563 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10564 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10565 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
10566 // CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
test_vld4_p16(poly16_t const * a)10567 poly16x4x4_t test_vld4_p16(poly16_t const *a) {
10568   return vld4_p16(a);
10569 }
10570 
10571 // CHECK-LABEL: @test_vst1q_u8(
10572 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10573 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
10574 // CHECK:   ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)10575 void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
10576   vst1q_u8(a, b);
10577 }
10578 
10579 // CHECK-LABEL: @test_vst1q_u16(
10580 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10581 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10582 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10583 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10584 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10585 // CHECK:   ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)10586 void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
10587   vst1q_u16(a, b);
10588 }
10589 
10590 // CHECK-LABEL: @test_vst1q_u32(
10591 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10592 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10593 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10594 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10595 // CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10596 // CHECK:   ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)10597 void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
10598   vst1q_u32(a, b);
10599 }
10600 
10601 // CHECK-LABEL: @test_vst1q_u64(
10602 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10603 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10604 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10605 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10606 // CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10607 // CHECK:   ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)10608 void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
10609   vst1q_u64(a, b);
10610 }
10611 
10612 // CHECK-LABEL: @test_vst1q_s8(
10613 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10614 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
10615 // CHECK:   ret void
test_vst1q_s8(int8_t * a,int8x16_t b)10616 void test_vst1q_s8(int8_t *a, int8x16_t b) {
10617   vst1q_s8(a, b);
10618 }
10619 
10620 // CHECK-LABEL: @test_vst1q_s16(
10621 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10622 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10623 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10624 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10625 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10626 // CHECK:   ret void
test_vst1q_s16(int16_t * a,int16x8_t b)10627 void test_vst1q_s16(int16_t *a, int16x8_t b) {
10628   vst1q_s16(a, b);
10629 }
10630 
10631 // CHECK-LABEL: @test_vst1q_s32(
10632 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10633 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10634 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10635 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10636 // CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10637 // CHECK:   ret void
test_vst1q_s32(int32_t * a,int32x4_t b)10638 void test_vst1q_s32(int32_t *a, int32x4_t b) {
10639   vst1q_s32(a, b);
10640 }
10641 
10642 // CHECK-LABEL: @test_vst1q_s64(
10643 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10644 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10645 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10646 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10647 // CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10648 // CHECK:   ret void
test_vst1q_s64(int64_t * a,int64x2_t b)10649 void test_vst1q_s64(int64_t *a, int64x2_t b) {
10650   vst1q_s64(a, b);
10651 }
10652 
10653 // CHECK-LABEL: @test_vst1q_f16(
10654 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
10655 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
10656 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
10657 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
10658 // CHECK:   store <8 x half> [[TMP3]], <8 x half>* [[TMP2]]
10659 // CHECK:   ret void
test_vst1q_f16(float16_t * a,float16x8_t b)10660 void test_vst1q_f16(float16_t *a, float16x8_t b) {
10661   vst1q_f16(a, b);
10662 }
10663 
10664 // CHECK-LABEL: @test_vst1q_f32(
10665 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
10666 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
10667 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
10668 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
10669 // CHECK:   store <4 x float> [[TMP3]], <4 x float>* [[TMP2]]
10670 // CHECK:   ret void
test_vst1q_f32(float32_t * a,float32x4_t b)10671 void test_vst1q_f32(float32_t *a, float32x4_t b) {
10672   vst1q_f32(a, b);
10673 }
10674 
10675 // CHECK-LABEL: @test_vst1q_f64(
10676 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
10677 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
10678 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
10679 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
10680 // CHECK:   store <2 x double> [[TMP3]], <2 x double>* [[TMP2]]
10681 // CHECK:   ret void
test_vst1q_f64(float64_t * a,float64x2_t b)10682 void test_vst1q_f64(float64_t *a, float64x2_t b) {
10683   vst1q_f64(a, b);
10684 }
10685 
10686 // CHECK-LABEL: @test_vst1q_p8(
10687 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10688 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
10689 // CHECK:   ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)10690 void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
10691   vst1q_p8(a, b);
10692 }
10693 
10694 // CHECK-LABEL: @test_vst1q_p16(
10695 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10696 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10697 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10698 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10699 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10700 // CHECK:   ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)10701 void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
10702   vst1q_p16(a, b);
10703 }
10704 
10705 // CHECK-LABEL: @test_vst1_u8(
10706 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10707 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
10708 // CHECK:   ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)10709 void test_vst1_u8(uint8_t *a, uint8x8_t b) {
10710   vst1_u8(a, b);
10711 }
10712 
10713 // CHECK-LABEL: @test_vst1_u16(
10714 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10715 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10716 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10717 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10718 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10719 // CHECK:   ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)10720 void test_vst1_u16(uint16_t *a, uint16x4_t b) {
10721   vst1_u16(a, b);
10722 }
10723 
10724 // CHECK-LABEL: @test_vst1_u32(
10725 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10726 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10727 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10728 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10729 // CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10730 // CHECK:   ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)10731 void test_vst1_u32(uint32_t *a, uint32x2_t b) {
10732   vst1_u32(a, b);
10733 }
10734 
10735 // CHECK-LABEL: @test_vst1_u64(
10736 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10737 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10738 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10739 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10740 // CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10741 // CHECK:   ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)10742 void test_vst1_u64(uint64_t *a, uint64x1_t b) {
10743   vst1_u64(a, b);
10744 }
10745 
10746 // CHECK-LABEL: @test_vst1_s8(
10747 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10748 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
10749 // CHECK:   ret void
test_vst1_s8(int8_t * a,int8x8_t b)10750 void test_vst1_s8(int8_t *a, int8x8_t b) {
10751   vst1_s8(a, b);
10752 }
10753 
10754 // CHECK-LABEL: @test_vst1_s16(
10755 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10756 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10757 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10758 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10759 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10760 // CHECK:   ret void
test_vst1_s16(int16_t * a,int16x4_t b)10761 void test_vst1_s16(int16_t *a, int16x4_t b) {
10762   vst1_s16(a, b);
10763 }
10764 
10765 // CHECK-LABEL: @test_vst1_s32(
10766 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10767 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10768 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10769 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10770 // CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10771 // CHECK:   ret void
test_vst1_s32(int32_t * a,int32x2_t b)10772 void test_vst1_s32(int32_t *a, int32x2_t b) {
10773   vst1_s32(a, b);
10774 }
10775 
10776 // CHECK-LABEL: @test_vst1_s64(
10777 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10778 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10779 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10780 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10781 // CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10782 // CHECK:   ret void
test_vst1_s64(int64_t * a,int64x1_t b)10783 void test_vst1_s64(int64_t *a, int64x1_t b) {
10784   vst1_s64(a, b);
10785 }
10786 
10787 // CHECK-LABEL: @test_vst1_f16(
10788 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
10789 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
10790 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
10791 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
10792 // CHECK:   store <4 x half> [[TMP3]], <4 x half>* [[TMP2]]
10793 // CHECK:   ret void
test_vst1_f16(float16_t * a,float16x4_t b)10794 void test_vst1_f16(float16_t *a, float16x4_t b) {
10795   vst1_f16(a, b);
10796 }
10797 
10798 // CHECK-LABEL: @test_vst1_f32(
10799 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
10800 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
10801 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
10802 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
10803 // CHECK:   store <2 x float> [[TMP3]], <2 x float>* [[TMP2]]
10804 // CHECK:   ret void
test_vst1_f32(float32_t * a,float32x2_t b)10805 void test_vst1_f32(float32_t *a, float32x2_t b) {
10806   vst1_f32(a, b);
10807 }
10808 
10809 // CHECK-LABEL: @test_vst1_f64(
10810 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
10811 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
10812 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
10813 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
10814 // CHECK:   store <1 x double> [[TMP3]], <1 x double>* [[TMP2]]
10815 // CHECK:   ret void
test_vst1_f64(float64_t * a,float64x1_t b)10816 void test_vst1_f64(float64_t *a, float64x1_t b) {
10817   vst1_f64(a, b);
10818 }
10819 
10820 // CHECK-LABEL: @test_vst1_p8(
10821 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10822 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
10823 // CHECK:   ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)10824 void test_vst1_p8(poly8_t *a, poly8x8_t b) {
10825   vst1_p8(a, b);
10826 }
10827 
10828 // CHECK-LABEL: @test_vst1_p16(
10829 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10830 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10831 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10832 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10833 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10834 // CHECK:   ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)10835 void test_vst1_p16(poly16_t *a, poly16x4_t b) {
10836   vst1_p16(a, b);
10837 }
10838 
10839 // CHECK-LABEL: @test_vst2q_u8(
10840 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
10841 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
10842 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
10843 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
10844 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
10845 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
10846 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10847 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10848 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
10849 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
10850 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10851 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
10852 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
10853 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
10854 // CHECK:   ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)10855 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
10856   vst2q_u8(a, b);
10857 }
10858 
10859 // CHECK-LABEL: @test_vst2q_u16(
10860 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
10861 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
10862 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
10863 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
10864 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
10865 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
10866 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10867 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
10868 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10869 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
10870 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
10871 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10872 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10873 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
10874 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
10875 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10876 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10877 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10878 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
10879 // CHECK:   ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)10880 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
10881   vst2q_u16(a, b);
10882 }
10883 
10884 // CHECK-LABEL: @test_vst2q_u32(
10885 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
10886 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
10887 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
10888 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
10889 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
10890 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
10891 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10892 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
10893 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
10894 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
10895 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
10896 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10897 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
10898 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
10899 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
10900 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10901 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10902 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10903 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
10904 // CHECK:   ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)10905 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
10906   vst2q_u32(a, b);
10907 }
10908 
10909 // CHECK-LABEL: @test_vst2q_u64(
10910 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
10911 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
10912 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
10913 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
10914 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
10915 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
10916 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10917 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
10918 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
10919 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
10920 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
10921 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
10922 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
10923 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
10924 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
10925 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
10926 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
10927 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
10928 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
10929 // CHECK:   ret void
test_vst2q_u64(uint64_t * a,uint64x2x2_t b)10930 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
10931   vst2q_u64(a, b);
10932 }
10933 
10934 // CHECK-LABEL: @test_vst2q_s8(
10935 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
10936 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
10937 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
10938 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
10939 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
10940 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
10941 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10942 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
10943 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
10944 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
10945 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
10946 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
10947 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
10948 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
10949 // CHECK:   ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)10950 void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
10951   vst2q_s8(a, b);
10952 }
10953 
10954 // CHECK-LABEL: @test_vst2q_s16(
10955 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
10956 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
10957 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
10958 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
10959 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
10960 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
10961 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10962 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
10963 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
10964 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
10965 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
10966 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10967 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
10968 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
10969 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
10970 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10971 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10972 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10973 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
10974 // CHECK:   ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)10975 void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
10976   vst2q_s16(a, b);
10977 }
10978 
10979 // CHECK-LABEL: @test_vst2q_s32(
10980 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
10981 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
10982 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
10983 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
10984 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
10985 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
10986 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10987 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
10988 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
10989 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
10990 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
10991 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10992 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
10993 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
10994 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
10995 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10996 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10997 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10998 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
10999 // CHECK:   ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)11000 void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
11001   vst2q_s32(a, b);
11002 }
11003 
11004 // CHECK-LABEL: @test_vst2q_s64(
11005 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
11006 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
11007 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
11008 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11009 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
11010 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
11011 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11012 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11013 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11014 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11015 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11016 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11017 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11018 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11019 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11020 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11021 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11022 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11023 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11024 // CHECK:   ret void
test_vst2q_s64(int64_t * a,int64x2x2_t b)11025 void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
11026   vst2q_s64(a, b);
11027 }
11028 
11029 // CHECK-LABEL: @test_vst2q_f16(
11030 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
11031 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
11032 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
11033 // CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
11034 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
11035 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
11036 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11037 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11038 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11039 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
11040 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11041 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11042 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11043 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
11044 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11045 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11046 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11047 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11048 // CHECK:   call void @llvm.aarch64.neon.st2.v8f16.p0i8(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i8* [[TMP2]])
11049 // CHECK:   ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)11050 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
11051   vst2q_f16(a, b);
11052 }
11053 
11054 // CHECK-LABEL: @test_vst2q_f32(
11055 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
11056 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
11057 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
11058 // CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
11059 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
11060 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
11061 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11062 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11063 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11064 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
11065 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11066 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11067 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11068 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
11069 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11070 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11071 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11072 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11073 // CHECK:   call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]])
11074 // CHECK:   ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)11075 void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
11076   vst2q_f32(a, b);
11077 }
11078 
11079 // CHECK-LABEL: @test_vst2q_f64(
11080 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
11081 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
11082 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
11083 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
11084 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
11085 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
11086 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11087 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
11088 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11089 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
11090 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11091 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11092 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11093 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
11094 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11095 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11096 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11097 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11098 // CHECK:   call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]])
11099 // CHECK:   ret void
test_vst2q_f64(float64_t * a,float64x2x2_t b)11100 void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
11101   vst2q_f64(a, b);
11102 }
11103 
11104 // CHECK-LABEL: @test_vst2q_p8(
11105 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
11106 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
11107 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
11108 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11109 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
11110 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
11111 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11112 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11113 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11114 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11115 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11116 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11117 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11118 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11119 // CHECK:   ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)11120 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
11121   vst2q_p8(a, b);
11122 }
11123 
11124 // CHECK-LABEL: @test_vst2q_p16(
11125 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
11126 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
11127 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
11128 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11129 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
11130 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
11131 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11132 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11133 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11134 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11135 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11136 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11137 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11138 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11139 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11140 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11141 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11142 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11143 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11144 // CHECK:   ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)11145 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
11146   vst2q_p16(a, b);
11147 }
11148 
11149 // CHECK-LABEL: @test_vst2_u8(
11150 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
11151 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
11152 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
11153 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11154 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
11155 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
11156 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11157 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11158 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11159 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11160 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11161 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11162 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11163 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11164 // CHECK:   ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)11165 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
11166   vst2_u8(a, b);
11167 }
11168 
11169 // CHECK-LABEL: @test_vst2_u16(
11170 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
11171 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
11172 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
11173 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11174 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
11175 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
11176 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11177 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11178 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11179 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11180 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11181 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11182 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11183 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11184 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11185 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11186 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11187 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11188 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11189 // CHECK:   ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)11190 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
11191   vst2_u16(a, b);
11192 }
11193 
11194 // CHECK-LABEL: @test_vst2_u32(
11195 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
11196 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
11197 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
11198 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11199 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
11200 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
11201 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11202 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11203 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11204 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11205 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11206 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11207 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11208 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11209 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11210 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11211 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11212 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11213 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11214 // CHECK:   ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)11215 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
11216   vst2_u32(a, b);
11217 }
11218 
11219 // CHECK-LABEL: @test_vst2_u64(
11220 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
11221 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
11222 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
11223 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11224 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
11225 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
11226 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11227 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11228 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11229 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11230 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11231 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11232 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11233 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11234 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11235 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11236 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11237 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11238 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11239 // CHECK:   ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)11240 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
11241   vst2_u64(a, b);
11242 }
11243 
11244 // CHECK-LABEL: @test_vst2_s8(
11245 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
11246 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
11247 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
11248 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11249 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
11250 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
11251 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11252 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11253 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11254 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11255 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11256 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11257 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11258 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11259 // CHECK:   ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)11260 void test_vst2_s8(int8_t *a, int8x8x2_t b) {
11261   vst2_s8(a, b);
11262 }
11263 
11264 // CHECK-LABEL: @test_vst2_s16(
11265 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
11266 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
11267 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
11268 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11269 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
11270 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
11271 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11272 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11273 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11274 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11275 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11276 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11277 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11278 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11279 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11280 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11281 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11282 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11283 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11284 // CHECK:   ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)11285 void test_vst2_s16(int16_t *a, int16x4x2_t b) {
11286   vst2_s16(a, b);
11287 }
11288 
11289 // CHECK-LABEL: @test_vst2_s32(
11290 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
11291 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
11292 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
11293 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11294 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
11295 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
11296 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11297 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11298 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11299 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11300 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11301 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11302 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11303 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11304 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11305 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11306 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11307 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11308 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11309 // CHECK:   ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)11310 void test_vst2_s32(int32_t *a, int32x2x2_t b) {
11311   vst2_s32(a, b);
11312 }
11313 
11314 // CHECK-LABEL: @test_vst2_s64(
11315 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
11316 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
11317 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
11318 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11319 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
11320 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
11321 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11322 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11323 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11324 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11325 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11326 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11327 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11328 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11329 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11330 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11331 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11332 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11333 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11334 // CHECK:   ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)11335 void test_vst2_s64(int64_t *a, int64x1x2_t b) {
11336   vst2_s64(a, b);
11337 }
11338 
11339 // CHECK-LABEL: @test_vst2_f16(
11340 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
11341 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
11342 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
11343 // CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
11344 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
11345 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
11346 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11347 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11348 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11349 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
11350 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
11351 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
11352 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11353 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
11354 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
11355 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
11356 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
11357 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
11358 // CHECK:   call void @llvm.aarch64.neon.st2.v4f16.p0i8(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i8* [[TMP2]])
11359 // CHECK:   ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)11360 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
11361   vst2_f16(a, b);
11362 }
11363 
11364 // CHECK-LABEL: @test_vst2_f32(
11365 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
11366 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
11367 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
11368 // CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
11369 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
11370 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
11371 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11372 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11373 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11374 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
11375 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
11376 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
11377 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11378 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
11379 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
11380 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
11381 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
11382 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
11383 // CHECK:   call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]])
11384 // CHECK:   ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)11385 void test_vst2_f32(float32_t *a, float32x2x2_t b) {
11386   vst2_f32(a, b);
11387 }
11388 
11389 // CHECK-LABEL: @test_vst2_f64(
11390 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
11391 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
11392 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
11393 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
11394 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
11395 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
11396 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11397 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
11398 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11399 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
11400 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
11401 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
11402 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11403 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
11404 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
11405 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
11406 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
11407 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
11408 // CHECK:   call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]])
11409 // CHECK:   ret void
test_vst2_f64(float64_t * a,float64x1x2_t b)11410 void test_vst2_f64(float64_t *a, float64x1x2_t b) {
11411   vst2_f64(a, b);
11412 }
11413 
11414 // CHECK-LABEL: @test_vst2_p8(
11415 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
11416 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
11417 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
11418 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11419 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
11420 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
11421 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11422 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11423 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11424 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11425 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11426 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11427 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11428 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11429 // CHECK:   ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)11430 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
11431   vst2_p8(a, b);
11432 }
11433 
11434 // CHECK-LABEL: @test_vst2_p16(
11435 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
11436 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
11437 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
11438 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11439 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
11440 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
11441 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11442 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11443 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11444 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11445 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11446 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11447 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11448 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11449 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11450 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11451 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11452 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11453 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11454 // CHECK:   ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)11455 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
11456   vst2_p16(a, b);
11457 }
11458 
11459 // CHECK-LABEL: @test_vst3q_u8(
11460 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
11461 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
11462 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
11463 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11464 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
11465 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
11466 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11467 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11468 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11469 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11470 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11471 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11472 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11473 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11474 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11475 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11476 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11477 // CHECK:   ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)11478 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
11479   vst3q_u8(a, b);
11480 }
11481 
11482 // CHECK-LABEL: @test_vst3q_u16(
11483 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
11484 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
11485 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
11486 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11487 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
11488 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
11489 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11490 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11491 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11492 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11493 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11494 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11495 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11496 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11497 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11498 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11499 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11500 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11501 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11502 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11503 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11504 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11505 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11506 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11507 // CHECK:   ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)11508 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
11509   vst3q_u16(a, b);
11510 }
11511 
11512 // CHECK-LABEL: @test_vst3q_u32(
11513 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
11514 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
11515 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
11516 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11517 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
11518 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
11519 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11520 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11521 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11522 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11523 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11524 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11525 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11526 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11527 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11528 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11529 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11530 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11531 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11532 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11533 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11534 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11535 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11536 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11537 // CHECK:   ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)11538 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
11539   vst3q_u32(a, b);
11540 }
11541 
11542 // CHECK-LABEL: @test_vst3q_u64(
11543 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
11544 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
11545 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
11546 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11547 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
11548 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
11549 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11550 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11551 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11552 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11553 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11554 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11555 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11556 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11557 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11558 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11559 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11560 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11561 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11562 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11563 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11564 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11565 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11566 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11567 // CHECK:   ret void
test_vst3q_u64(uint64_t * a,uint64x2x3_t b)11568 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
11569   vst3q_u64(a, b);
11570 }
11571 
11572 // CHECK-LABEL: @test_vst3q_s8(
11573 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
11574 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
11575 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
11576 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11577 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
11578 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
11579 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11580 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11581 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11582 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11583 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11584 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11585 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11586 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11587 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11588 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11589 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11590 // CHECK:   ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)11591 void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
11592   vst3q_s8(a, b);
11593 }
11594 
11595 // CHECK-LABEL: @test_vst3q_s16(
11596 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
11597 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
11598 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
11599 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11600 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
11601 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
11602 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11603 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11604 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11605 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11606 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11607 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11608 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11609 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11610 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11611 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11612 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11613 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11614 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11615 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11616 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11617 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11618 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11619 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11620 // CHECK:   ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)11621 void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
11622   vst3q_s16(a, b);
11623 }
11624 
11625 // CHECK-LABEL: @test_vst3q_s32(
11626 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
11627 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
11628 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
11629 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11630 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
11631 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
11632 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11633 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11634 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11635 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11636 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11637 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11638 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11639 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11640 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11641 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11642 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11643 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11644 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11645 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11646 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11647 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11648 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11649 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11650 // CHECK:   ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)11651 void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
11652   vst3q_s32(a, b);
11653 }
11654 
11655 // CHECK-LABEL: @test_vst3q_s64(
11656 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
11657 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
11658 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
11659 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11660 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
11661 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
11662 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11663 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11664 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11665 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11666 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11667 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11668 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11669 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11670 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11671 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11672 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11673 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11674 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11675 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11676 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11677 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11678 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11679 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11680 // CHECK:   ret void
test_vst3q_s64(int64_t * a,int64x2x3_t b)11681 void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
11682   vst3q_s64(a, b);
11683 }
11684 
11685 // CHECK-LABEL: @test_vst3q_f16(
11686 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
11687 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
11688 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
11689 // CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
11690 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
11691 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
11692 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11693 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11694 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11695 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
11696 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11697 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11698 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11699 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
11700 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11701 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11702 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11703 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
11704 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
11705 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
11706 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11707 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11708 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
11709 // CHECK:   call void @llvm.aarch64.neon.st3.v8f16.p0i8(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i8* [[TMP2]])
11710 // CHECK:   ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)11711 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
11712   vst3q_f16(a, b);
11713 }
11714 
11715 // CHECK-LABEL: @test_vst3q_f32(
11716 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
11717 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
11718 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
11719 // CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
11720 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
11721 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
11722 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11723 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11724 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11725 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
11726 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11727 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11728 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11729 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
11730 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11731 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11732 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11733 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
11734 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
11735 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
11736 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11737 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11738 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
11739 // CHECK:   call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]])
11740 // CHECK:   ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)11741 void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
11742   vst3q_f32(a, b);
11743 }
11744 
11745 // CHECK-LABEL: @test_vst3q_f64(
11746 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
11747 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
11748 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
11749 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
11750 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
11751 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
11752 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11753 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
11754 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11755 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
11756 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11757 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11758 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11759 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
11760 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11761 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11762 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11763 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
11764 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
11765 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
11766 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11767 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11768 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
11769 // CHECK:   call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]])
11770 // CHECK:   ret void
test_vst3q_f64(float64_t * a,float64x2x3_t b)11771 void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
11772   vst3q_f64(a, b);
11773 }
11774 
11775 // CHECK-LABEL: @test_vst3q_p8(
11776 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
11777 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
11778 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
11779 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11780 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
11781 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
11782 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11783 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11784 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11785 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11786 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11787 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11788 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11789 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11790 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11791 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11792 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11793 // CHECK:   ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)11794 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
11795   vst3q_p8(a, b);
11796 }
11797 
11798 // CHECK-LABEL: @test_vst3q_p16(
11799 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
11800 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
11801 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
11802 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11803 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
11804 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
11805 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11806 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11807 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11808 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11809 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11810 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11811 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11812 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11813 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11814 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11815 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11816 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11817 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11818 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11819 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11820 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11821 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11822 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11823 // CHECK:   ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)11824 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
11825   vst3q_p16(a, b);
11826 }
11827 
11828 // CHECK-LABEL: @test_vst3_u8(
11829 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
11830 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
11831 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
11832 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
11833 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
11834 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
11835 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11836 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11837 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
11838 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11839 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11840 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11841 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11842 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11843 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
11844 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
11845 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
11846 // CHECK:   ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)11847 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
11848   vst3_u8(a, b);
11849 }
11850 
11851 // CHECK-LABEL: @test_vst3_u16(
11852 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
11853 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
11854 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
11855 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
11856 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
11857 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
11858 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11859 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11860 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11861 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
11862 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11863 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11864 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11865 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11866 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11867 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11868 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11869 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
11870 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
11871 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11872 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11873 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11874 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11875 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
11876 // CHECK:   ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)11877 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
11878   vst3_u16(a, b);
11879 }
11880 
11881 // CHECK-LABEL: @test_vst3_u32(
11882 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
11883 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
11884 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
11885 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
11886 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
11887 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
11888 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11889 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11890 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11891 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
11892 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11893 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11894 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11895 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11896 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11897 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11898 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11899 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
11900 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
11901 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
11902 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11903 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11904 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
11905 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
11906 // CHECK:   ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)11907 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
11908   vst3_u32(a, b);
11909 }
11910 
11911 // CHECK-LABEL: @test_vst3_u64(
11912 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
11913 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
11914 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
11915 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
11916 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
11917 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
11918 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11919 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11920 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11921 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
11922 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11923 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11924 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11925 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11926 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11927 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11928 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11929 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
11930 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
11931 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
11932 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11933 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11934 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
11935 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
11936 // CHECK:   ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)11937 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
11938   vst3_u64(a, b);
11939 }
11940 
11941 // CHECK-LABEL: @test_vst3_s8(
11942 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
11943 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
11944 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
11945 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
11946 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
11947 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
11948 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11949 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11950 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
11951 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11952 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11953 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11954 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11955 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11956 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
11957 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
11958 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
11959 // CHECK:   ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)11960 void test_vst3_s8(int8_t *a, int8x8x3_t b) {
11961   vst3_s8(a, b);
11962 }
11963 
11964 // CHECK-LABEL: @test_vst3_s16(
11965 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
11966 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
11967 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
11968 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
11969 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
11970 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
11971 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11972 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11973 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11974 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
11975 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11976 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11977 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11978 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11979 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11980 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11981 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11982 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
11983 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
11984 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11985 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11986 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11987 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11988 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
11989 // CHECK:   ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)11990 void test_vst3_s16(int16_t *a, int16x4x3_t b) {
11991   vst3_s16(a, b);
11992 }
11993 
11994 // CHECK-LABEL: @test_vst3_s32(
11995 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
11996 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
11997 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
11998 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
11999 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
12000 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
12001 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12002 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12003 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12004 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12005 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12006 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12007 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12008 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12009 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12010 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12011 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12012 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12013 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12014 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12015 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12016 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12017 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12018 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12019 // CHECK:   ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)12020 void test_vst3_s32(int32_t *a, int32x2x3_t b) {
12021   vst3_s32(a, b);
12022 }
12023 
12024 // CHECK-LABEL: @test_vst3_s64(
12025 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
12026 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
12027 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
12028 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12029 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
12030 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
12031 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12032 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12033 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12034 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12035 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12036 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12037 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12038 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12039 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12040 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12041 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12042 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12043 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12044 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12045 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12046 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12047 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12048 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12049 // CHECK:   ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)12050 void test_vst3_s64(int64_t *a, int64x1x3_t b) {
12051   vst3_s64(a, b);
12052 }
12053 
12054 // CHECK-LABEL: @test_vst3_f16(
12055 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
12056 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
12057 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
12058 // CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
12059 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
12060 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
12061 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12062 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12063 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12064 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
12065 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12066 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12067 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12068 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
12069 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12070 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12071 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12072 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
12073 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
12074 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12075 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12076 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12077 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12078 // CHECK:   call void @llvm.aarch64.neon.st3.v4f16.p0i8(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i8* [[TMP2]])
12079 // CHECK:   ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)12080 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
12081   vst3_f16(a, b);
12082 }
12083 
12084 // CHECK-LABEL: @test_vst3_f32(
12085 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
12086 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
12087 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
12088 // CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
12089 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
12090 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
12091 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12092 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12093 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12094 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
12095 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12096 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12097 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12098 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
12099 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12100 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12101 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12102 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
12103 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
12104 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12105 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12106 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12107 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12108 // CHECK:   call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]])
12109 // CHECK:   ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)12110 void test_vst3_f32(float32_t *a, float32x2x3_t b) {
12111   vst3_f32(a, b);
12112 }
12113 
12114 // CHECK-LABEL: @test_vst3_f64(
12115 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
12116 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
12117 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
12118 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
12119 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
12120 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
12121 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12122 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12123 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12124 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
12125 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12126 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12127 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12128 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
12129 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12130 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12131 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12132 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
12133 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
12134 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12135 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12136 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12137 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12138 // CHECK:   call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]])
12139 // CHECK:   ret void
test_vst3_f64(float64_t * a,float64x1x3_t b)12140 void test_vst3_f64(float64_t *a, float64x1x3_t b) {
12141   vst3_f64(a, b);
12142 }
12143 
12144 // CHECK-LABEL: @test_vst3_p8(
12145 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
12146 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
12147 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
12148 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12149 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
12150 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
12151 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12152 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12153 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12154 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12155 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12156 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12157 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12158 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12159 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12160 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12161 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12162 // CHECK:   ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)12163 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
12164   vst3_p8(a, b);
12165 }
12166 
12167 // CHECK-LABEL: @test_vst3_p16(
12168 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
12169 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
12170 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
12171 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12172 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
12173 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
12174 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12175 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12176 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12177 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12178 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12179 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12180 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12181 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12182 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12183 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12184 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12185 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12186 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12187 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12188 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12189 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12190 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12191 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12192 // CHECK:   ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)12193 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
12194   vst3_p16(a, b);
12195 }
12196 
12197 // CHECK-LABEL: @test_vst4q_u8(
12198 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
12199 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
12200 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
12201 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12202 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
12203 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
12204 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12205 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12206 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12207 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12208 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12209 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12210 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12211 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12212 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12213 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12214 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12215 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12216 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12217 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12218 // CHECK:   ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)12219 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
12220   vst4q_u8(a, b);
12221 }
12222 
12223 // CHECK-LABEL: @test_vst4q_u16(
12224 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
12225 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
12226 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
12227 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12228 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
12229 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
12230 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12231 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12232 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12233 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12234 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12235 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12236 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12237 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12238 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12239 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12240 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12241 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12242 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12243 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12244 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12245 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12246 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12247 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12248 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12249 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12250 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12251 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12252 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12253 // CHECK:   ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)12254 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
12255   vst4q_u16(a, b);
12256 }
12257 
12258 // CHECK-LABEL: @test_vst4q_u32(
12259 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
12260 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
12261 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
12262 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12263 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
12264 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
12265 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12266 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12267 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12268 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12269 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12270 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12271 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12272 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12273 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12274 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12275 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12276 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12277 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12278 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12279 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12280 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12281 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12282 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12283 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12284 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12285 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12286 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12287 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12288 // CHECK:   ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)12289 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
12290   vst4q_u32(a, b);
12291 }
12292 
12293 // CHECK-LABEL: @test_vst4q_u64(
12294 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
12295 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
12296 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
12297 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12298 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
12299 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
12300 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12301 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12302 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12303 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12304 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12305 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12306 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12307 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12308 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12309 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12310 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12311 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12312 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12313 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12314 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12315 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12316 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12317 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12318 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12319 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12320 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12321 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12322 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12323 // CHECK:   ret void
test_vst4q_u64(uint64_t * a,uint64x2x4_t b)12324 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
12325   vst4q_u64(a, b);
12326 }
12327 
12328 // CHECK-LABEL: @test_vst4q_s8(
12329 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
12330 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
12331 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
12332 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12333 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
12334 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
12335 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12336 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12337 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12338 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12339 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12340 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12341 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12342 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12343 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12344 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12345 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12346 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12347 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12348 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12349 // CHECK:   ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)12350 void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
12351   vst4q_s8(a, b);
12352 }
12353 
12354 // CHECK-LABEL: @test_vst4q_s16(
12355 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
12356 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
12357 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
12358 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12359 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
12360 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
12361 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12362 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12363 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12364 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12365 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12366 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12367 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12368 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12369 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12370 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12371 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12372 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12373 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12374 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12375 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12376 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12377 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12378 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12379 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12380 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12381 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12382 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12383 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12384 // CHECK:   ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)12385 void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
12386   vst4q_s16(a, b);
12387 }
12388 
12389 // CHECK-LABEL: @test_vst4q_s32(
12390 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
12391 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
12392 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
12393 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12394 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
12395 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
12396 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12397 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12398 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12399 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12400 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12401 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12402 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12403 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12404 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12405 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12406 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12407 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12408 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12409 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12410 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12411 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12412 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12413 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12414 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12415 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12416 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12417 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12418 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12419 // CHECK:   ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)12420 void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
12421   vst4q_s32(a, b);
12422 }
12423 
12424 // CHECK-LABEL: @test_vst4q_s64(
12425 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
12426 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
12427 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
12428 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12429 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
12430 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
12431 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12432 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12433 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12434 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12435 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12436 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12437 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12438 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12439 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12440 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12441 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12442 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12443 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12444 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12445 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12446 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12447 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12448 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12449 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12450 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12451 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12452 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12453 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12454 // CHECK:   ret void
test_vst4q_s64(int64_t * a,int64x2x4_t b)12455 void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
12456   vst4q_s64(a, b);
12457 }
12458 
12459 // CHECK-LABEL: @test_vst4q_f16(
12460 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
12461 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
12462 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
12463 // CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
12464 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
12465 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
12466 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12467 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12468 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12469 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
12470 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
12471 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
12472 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12473 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
12474 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
12475 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
12476 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12477 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
12478 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
12479 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
12480 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12481 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
12482 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
12483 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
12484 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
12485 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
12486 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
12487 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
12488 // CHECK:   call void @llvm.aarch64.neon.st4.v8f16.p0i8(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i8* [[TMP2]])
12489 // CHECK:   ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)12490 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
12491   vst4q_f16(a, b);
12492 }
12493 
12494 // CHECK-LABEL: @test_vst4q_f32(
12495 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
12496 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
12497 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
12498 // CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
12499 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
12500 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
12501 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12502 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12503 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12504 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
12505 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
12506 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
12507 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12508 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
12509 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12510 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12511 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12512 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
12513 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
12514 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
12515 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12516 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
12517 // CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
12518 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
12519 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12520 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12521 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
12522 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
12523 // CHECK:   call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]])
12524 // CHECK:   ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)12525 void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
12526   vst4q_f32(a, b);
12527 }
12528 
12529 // CHECK-LABEL: @test_vst4q_f64(
12530 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
12531 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
12532 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
12533 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
12534 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
12535 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
12536 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12537 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12538 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12539 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
12540 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12541 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12542 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12543 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
12544 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12545 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12546 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12547 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
12548 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
12549 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12550 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12551 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
12552 // CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
12553 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
12554 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12555 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12556 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12557 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
12558 // CHECK:   call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]])
12559 // CHECK:   ret void
test_vst4q_f64(float64_t * a,float64x2x4_t b)12560 void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
12561   vst4q_f64(a, b);
12562 }
12563 
12564 // CHECK-LABEL: @test_vst4q_p8(
12565 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
12566 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
12567 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
12568 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12569 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
12570 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
12571 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12572 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12573 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12574 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12575 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12576 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12577 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12578 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12579 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12580 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12581 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12582 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12583 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12584 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12585 // CHECK:   ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)12586 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
12587   vst4q_p8(a, b);
12588 }
12589 
12590 // CHECK-LABEL: @test_vst4q_p16(
12591 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
12592 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
12593 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
12594 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12595 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
12596 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
12597 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12598 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12599 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12600 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12601 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12602 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12603 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12604 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12605 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12606 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12607 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12608 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12609 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12610 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12611 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12612 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12613 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12614 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12615 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12616 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12617 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12618 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12619 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12620 // CHECK:   ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)12621 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
12622   vst4q_p16(a, b);
12623 }
12624 
12625 // CHECK-LABEL: @test_vst4_u8(
12626 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
12627 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
12628 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
12629 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12630 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
12631 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
12632 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12633 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12634 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12635 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12636 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12637 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12638 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12639 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12640 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12641 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12642 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12643 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12644 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12645 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12646 // CHECK:   ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)12647 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
12648   vst4_u8(a, b);
12649 }
12650 
12651 // CHECK-LABEL: @test_vst4_u16(
12652 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
12653 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
12654 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
12655 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12656 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
12657 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
12658 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12659 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12660 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12661 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12662 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12663 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12664 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12665 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12666 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12667 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12668 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12669 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12670 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12671 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12672 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12673 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12674 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12675 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12676 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12677 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12678 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12679 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12680 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12681 // CHECK:   ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)12682 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
12683   vst4_u16(a, b);
12684 }
12685 
12686 // CHECK-LABEL: @test_vst4_u32(
12687 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
12688 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
12689 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
12690 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12691 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
12692 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
12693 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12694 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12695 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12696 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12697 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12698 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12699 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12700 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12701 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12702 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12703 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12704 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12705 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12706 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12707 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12708 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12709 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12710 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12711 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12712 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12713 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12714 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12715 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12716 // CHECK:   ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)12717 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
12718   vst4_u32(a, b);
12719 }
12720 
12721 // CHECK-LABEL: @test_vst4_u64(
12722 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
12723 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
12724 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
12725 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12726 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
12727 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
12728 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12729 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12730 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12731 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12732 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12733 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12734 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12735 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12736 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12737 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12738 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12739 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12740 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12741 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12742 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12743 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12744 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12745 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12746 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12747 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12748 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12749 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12750 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12751 // CHECK:   ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)12752 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
12753   vst4_u64(a, b);
12754 }
12755 
12756 // CHECK-LABEL: @test_vst4_s8(
12757 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
12758 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
12759 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
12760 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12761 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
12762 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
12763 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12764 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12765 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12766 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12767 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12768 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12769 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12770 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12771 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12772 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12773 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12774 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12775 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12776 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12777 // CHECK:   ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)12778 void test_vst4_s8(int8_t *a, int8x8x4_t b) {
12779   vst4_s8(a, b);
12780 }
12781 
12782 // CHECK-LABEL: @test_vst4_s16(
12783 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
12784 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
12785 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
12786 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12787 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
12788 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
12789 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12790 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12791 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12792 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12793 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12794 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12795 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12796 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12797 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12798 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12799 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12800 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12801 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12802 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12803 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12804 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12805 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12806 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12807 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12808 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12809 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12810 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12811 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12812 // CHECK:   ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)12813 void test_vst4_s16(int16_t *a, int16x4x4_t b) {
12814   vst4_s16(a, b);
12815 }
12816 
12817 // CHECK-LABEL: @test_vst4_s32(
12818 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
12819 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
12820 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
12821 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12822 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
12823 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
12824 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12825 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12826 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12827 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12828 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12829 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12830 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12831 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12832 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12833 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12834 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12835 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12836 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12837 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12838 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12839 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12840 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12841 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12842 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12843 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12844 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12845 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12846 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12847 // CHECK:   ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)12848 void test_vst4_s32(int32_t *a, int32x2x4_t b) {
12849   vst4_s32(a, b);
12850 }
12851 
12852 // CHECK-LABEL: @test_vst4_s64(
12853 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
12854 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
12855 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
12856 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12857 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
12858 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
12859 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12860 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12861 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12862 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12863 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12864 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12865 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12866 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12867 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12868 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12869 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12870 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12871 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12872 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12873 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12874 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12875 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12876 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12877 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12878 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12879 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12880 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12881 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12882 // CHECK:   ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)12883 void test_vst4_s64(int64_t *a, int64x1x4_t b) {
12884   vst4_s64(a, b);
12885 }
12886 
12887 // CHECK-LABEL: @test_vst4_f16(
12888 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
12889 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
12890 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
12891 // CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
12892 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
12893 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
12894 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12895 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12896 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12897 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
12898 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12899 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12900 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12901 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
12902 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12903 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12904 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12905 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
12906 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
12907 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12908 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12909 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
12910 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
12911 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
12912 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12913 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12914 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12915 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
12916 // CHECK:   call void @llvm.aarch64.neon.st4.v4f16.p0i8(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i8* [[TMP2]])
12917 // CHECK:   ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)12918 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
12919   vst4_f16(a, b);
12920 }
12921 
12922 // CHECK-LABEL: @test_vst4_f32(
12923 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
12924 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
12925 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
12926 // CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
12927 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
12928 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
12929 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12930 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12931 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12932 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
12933 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12934 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12935 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12936 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
12937 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12938 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12939 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12940 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
12941 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
12942 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12943 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12944 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
12945 // CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
12946 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
12947 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12948 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12949 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12950 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
12951 // CHECK:   call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]])
12952 // CHECK:   ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)12953 void test_vst4_f32(float32_t *a, float32x2x4_t b) {
12954   vst4_f32(a, b);
12955 }
12956 
12957 // CHECK-LABEL: @test_vst4_f64(
12958 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
12959 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
12960 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
12961 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
12962 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
12963 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
12964 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12965 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12966 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12967 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
12968 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12969 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12970 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12971 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
12972 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12973 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12974 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12975 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
12976 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
12977 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12978 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12979 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
12980 // CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
12981 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
12982 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12983 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12984 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12985 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
12986 // CHECK:   call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]])
12987 // CHECK:   ret void
test_vst4_f64(float64_t * a,float64x1x4_t b)12988 void test_vst4_f64(float64_t *a, float64x1x4_t b) {
12989   vst4_f64(a, b);
12990 }
12991 
12992 // CHECK-LABEL: @test_vst4_p8(
12993 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
12994 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
12995 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
12996 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12997 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
12998 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
12999 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13000 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13001 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13002 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13003 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13004 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13005 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13006 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13007 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13008 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13009 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13010 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13011 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13012 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13013 // CHECK:   ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)13014 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
13015   vst4_p8(a, b);
13016 }
13017 
13018 // CHECK-LABEL: @test_vst4_p16(
13019 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
13020 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
13021 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
13022 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13023 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
13024 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
13025 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13026 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13027 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13028 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13029 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13030 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13031 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13032 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13033 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13034 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13035 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13036 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13037 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13038 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13039 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13040 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13041 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13042 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13043 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13044 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13045 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13046 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13047 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13048 // CHECK:   ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)13049 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
13050   vst4_p16(a, b);
13051 }
13052 
13053 // CHECK-LABEL: @test_vld1q_f64_x2(
13054 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
13055 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
13056 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13057 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13058 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13059 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]])
13060 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
13061 // CHECK:   store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
13062 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
13063 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13064 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13065 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
13066 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
test_vld1q_f64_x2(float64_t const * a)13067 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
13068   return vld1q_f64_x2(a);
13069 }
13070 
13071 // CHECK-LABEL: @test_vld1q_p64_x2(
13072 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
13073 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
13074 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13075 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13076 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13077 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
13078 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
13079 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
13080 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
13081 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13082 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13083 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
13084 // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
test_vld1q_p64_x2(poly64_t const * a)13085 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
13086   return vld1q_p64_x2(a);
13087 }
13088 
13089 // CHECK-LABEL: @test_vld1_f64_x2(
13090 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
13091 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
13092 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13093 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13094 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13095 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]])
13096 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
13097 // CHECK:   store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
13098 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
13099 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13100 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13101 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
13102 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
test_vld1_f64_x2(float64_t const * a)13103 float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
13104   return vld1_f64_x2(a);
13105 }
13106 
13107 // CHECK-LABEL: @test_vld1_p64_x2(
13108 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
13109 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
13110 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13111 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13112 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13113 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
13114 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
13115 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
13116 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
13117 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13118 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13119 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
13120 // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
test_vld1_p64_x2(poly64_t const * a)13121 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
13122   return vld1_p64_x2(a);
13123 }
13124 
13125 // CHECK-LABEL: @test_vld1q_f64_x3(
13126 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
13127 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
13128 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13129 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13130 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13131 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]])
13132 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
13133 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13134 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
13135 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13136 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13137 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
13138 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
test_vld1q_f64_x3(float64_t const * a)13139 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
13140   return vld1q_f64_x3(a);
13141 }
13142 
13143 // CHECK-LABEL: @test_vld1q_p64_x3(
13144 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
13145 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
13146 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13147 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13148 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13149 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
13150 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
13151 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13152 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
13153 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13154 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13155 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
13156 // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
test_vld1q_p64_x3(poly64_t const * a)13157 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
13158   return vld1q_p64_x3(a);
13159 }
13160 
13161 // CHECK-LABEL: @test_vld1_f64_x3(
13162 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
13163 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
13164 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13165 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13166 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13167 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]])
13168 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
13169 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13170 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
13171 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13172 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13173 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
13174 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
test_vld1_f64_x3(float64_t const * a)13175 float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
13176   return vld1_f64_x3(a);
13177 }
13178 
13179 // CHECK-LABEL: @test_vld1_p64_x3(
13180 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
13181 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
13182 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13183 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13184 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13185 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
13186 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
13187 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13188 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
13189 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13190 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13191 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
13192 // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
test_vld1_p64_x3(poly64_t const * a)13193 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
13194   return vld1_p64_x3(a);
13195 }
13196 
13197 // CHECK-LABEL: @test_vld1q_f64_x4(
13198 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
13199 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
13200 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13201 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13202 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13203 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]])
13204 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
13205 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13206 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
13207 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13208 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13209 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
13210 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
test_vld1q_f64_x4(float64_t const * a)13211 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
13212   return vld1q_f64_x4(a);
13213 }
13214 
13215 // CHECK-LABEL: @test_vld1q_p64_x4(
13216 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
13217 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
13218 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13219 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13220 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13221 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
13222 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
13223 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13224 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
13225 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13226 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13227 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
13228 // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
test_vld1q_p64_x4(poly64_t const * a)13229 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
13230   return vld1q_p64_x4(a);
13231 }
13232 
13233 // CHECK-LABEL: @test_vld1_f64_x4(
13234 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
13235 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
13236 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13237 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13238 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13239 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]])
13240 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
13241 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13242 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
13243 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13244 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13245 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
13246 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
test_vld1_f64_x4(float64_t const * a)13247 float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
13248   return vld1_f64_x4(a);
13249 }
13250 
13251 // CHECK-LABEL: @test_vld1_p64_x4(
13252 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
13253 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
13254 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13255 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13256 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13257 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
13258 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
13259 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13260 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
13261 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13262 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13263 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
13264 // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
test_vld1_p64_x4(poly64_t const * a)13265 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
13266   return vld1_p64_x4(a);
13267 }
13268 
13269 // CHECK-LABEL: @test_vst1q_f64_x2(
13270 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
13271 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
13272 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
13273 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
13274 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
13275 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
13276 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13277 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13278 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13279 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
13280 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13281 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13282 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13283 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
13284 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13285 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13286 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13287 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13288 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13289 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]])
13290 // CHECK:   ret void
test_vst1q_f64_x2(float64_t * a,float64x2x2_t b)13291 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
13292   vst1q_f64_x2(a, b);
13293 }
13294 
13295 // CHECK-LABEL: @test_vst1q_p64_x2(
13296 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
13297 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
13298 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
13299 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
13300 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
13301 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
13302 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13303 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13304 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13305 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
13306 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13307 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13308 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13309 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13310 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13311 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13312 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13313 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13314 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13315 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
13316 // CHECK:   ret void
test_vst1q_p64_x2(poly64_t * a,poly64x2x2_t b)13317 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
13318   vst1q_p64_x2(a, b);
13319 }
13320 
13321 // CHECK-LABEL: @test_vst1_f64_x2(
13322 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
13323 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
13324 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
13325 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
13326 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
13327 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
13328 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13329 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13330 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13331 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
13332 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13333 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13334 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13335 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
13336 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13337 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13338 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13339 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13340 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13341 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]])
13342 // CHECK:   ret void
test_vst1_f64_x2(float64_t * a,float64x1x2_t b)13343 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
13344   vst1_f64_x2(a, b);
13345 }
13346 
13347 // CHECK-LABEL: @test_vst1_p64_x2(
13348 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
13349 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
13350 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
13351 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
13352 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
13353 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
13354 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13355 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13356 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13357 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
13358 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13359 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13360 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13361 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13362 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13363 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13364 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13365 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13366 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13367 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
13368 // CHECK:   ret void
test_vst1_p64_x2(poly64_t * a,poly64x1x2_t b)13369 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
13370   vst1_p64_x2(a, b);
13371 }
13372 
13373 // CHECK-LABEL: @test_vst1q_f64_x3(
13374 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
13375 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
13376 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
13377 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
13378 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
13379 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
13380 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13381 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13382 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13383 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
13384 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13385 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13386 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13387 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
13388 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13389 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13390 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13391 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
13392 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13393 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13394 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13395 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13396 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13397 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13398 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]])
13399 // CHECK:   ret void
test_vst1q_f64_x3(float64_t * a,float64x2x3_t b)13400 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
13401   vst1q_f64_x3(a, b);
13402 }
13403 
13404 // CHECK-LABEL: @test_vst1q_p64_x3(
13405 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
13406 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
13407 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
13408 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
13409 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
13410 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
13411 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13412 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13413 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13414 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
13415 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13416 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13417 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13418 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13419 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13420 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13421 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13422 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13423 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13424 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13425 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13426 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13427 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13428 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13429 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
13430 // CHECK:   ret void
test_vst1q_p64_x3(poly64_t * a,poly64x2x3_t b)13431 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
13432   vst1q_p64_x3(a, b);
13433 }
13434 
13435 // CHECK-LABEL: @test_vst1_f64_x3(
13436 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
13437 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
13438 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
13439 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
13440 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
13441 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
13442 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13443 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13444 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13445 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
13446 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13447 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13448 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13449 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
13450 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13451 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13452 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13453 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
13454 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13455 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13456 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13457 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13458 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13459 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13460 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]])
13461 // CHECK:   ret void
test_vst1_f64_x3(float64_t * a,float64x1x3_t b)13462 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
13463   vst1_f64_x3(a, b);
13464 }
13465 
13466 // CHECK-LABEL: @test_vst1_p64_x3(
13467 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
13468 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
13469 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
13470 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
13471 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
13472 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
13473 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13474 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13475 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13476 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
13477 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13478 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13479 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13480 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13481 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13482 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13483 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13484 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13485 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13486 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13487 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13488 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13489 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13490 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13491 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
13492 // CHECK:   ret void
test_vst1_p64_x3(poly64_t * a,poly64x1x3_t b)13493 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
13494   vst1_p64_x3(a, b);
13495 }
13496 
13497 // CHECK-LABEL: @test_vst1q_f64_x4(
13498 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
13499 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
13500 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
13501 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
13502 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
13503 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
13504 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13505 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13506 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13507 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
13508 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13509 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13510 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13511 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
13512 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13513 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13514 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13515 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
13516 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13517 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13518 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13519 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
13520 // CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
13521 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
13522 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13523 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13524 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13525 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
13526 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13527 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]])
13528 // CHECK:   ret void
test_vst1q_f64_x4(float64_t * a,float64x2x4_t b)13529 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
13530   vst1q_f64_x4(a, b);
13531 }
13532 
13533 // CHECK-LABEL: @test_vst1q_p64_x4(
13534 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
13535 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
13536 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
13537 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13538 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
13539 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
13540 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13541 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13542 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13543 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13544 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13545 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13546 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13547 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13548 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13549 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13550 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13551 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13552 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13553 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13554 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13555 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13556 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13557 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13558 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13559 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13560 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13561 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13562 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13563 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
13564 // CHECK:   ret void
test_vst1q_p64_x4(poly64_t * a,poly64x2x4_t b)13565 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
13566   vst1q_p64_x4(a, b);
13567 }
13568 
13569 // CHECK-LABEL: @test_vst1_f64_x4(
13570 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
13571 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
13572 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
13573 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
13574 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
13575 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
13576 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13577 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13578 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13579 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
13580 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13581 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13582 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13583 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
13584 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13585 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13586 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13587 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
13588 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13589 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13590 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13591 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
13592 // CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
13593 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
13594 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13595 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13596 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13597 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
13598 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13599 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]])
13600 // CHECK:   ret void
test_vst1_f64_x4(float64_t * a,float64x1x4_t b)13601 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
13602   vst1_f64_x4(a, b);
13603 }
13604 
13605 // CHECK-LABEL: @test_vst1_p64_x4(
13606 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
13607 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
13608 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
13609 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13610 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
13611 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
13612 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13613 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13614 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13615 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13616 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13617 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13618 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13619 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13620 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13621 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13622 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13623 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13624 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13625 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13626 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13627 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13628 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13629 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13630 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13631 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13632 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13633 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13634 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13635 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
13636 // CHECK:   ret void
test_vst1_p64_x4(poly64_t * a,poly64x1x4_t b)13637 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
13638   vst1_p64_x4(a, b);
13639 }
13640 
13641 // CHECK-LABEL: @test_vceqd_s64(
13642 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
13643 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13644 // CHECK:   ret i64 [[VCEQD_I]]
test_vceqd_s64(int64_t a,int64_t b)13645 int64_t test_vceqd_s64(int64_t a, int64_t b) {
13646   return (int64_t)vceqd_s64(a, b);
13647 }
13648 
13649 // CHECK-LABEL: @test_vceqd_u64(
13650 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
13651 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13652 // CHECK:   ret i64 [[VCEQD_I]]
test_vceqd_u64(uint64_t a,uint64_t b)13653 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
13654   return (int64_t)vceqd_u64(a, b);
13655 }
13656 
13657 // CHECK-LABEL: @test_vceqzd_s64(
13658 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
13659 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
13660 // CHECK:   ret i64 [[VCEQZ_I]]
test_vceqzd_s64(int64_t a)13661 int64_t test_vceqzd_s64(int64_t a) {
13662   return (int64_t)vceqzd_s64(a);
13663 }
13664 
13665 // CHECK-LABEL: @test_vceqzd_u64(
13666 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
13667 // CHECK:   [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
13668 // CHECK:   ret i64 [[VCEQZD_I]]
test_vceqzd_u64(int64_t a)13669 int64_t test_vceqzd_u64(int64_t a) {
13670   return (int64_t)vceqzd_u64(a);
13671 }
13672 
13673 // CHECK-LABEL: @test_vcged_s64(
13674 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, %b
13675 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13676 // CHECK:   ret i64 [[VCEQD_I]]
test_vcged_s64(int64_t a,int64_t b)13677 int64_t test_vcged_s64(int64_t a, int64_t b) {
13678   return (int64_t)vcged_s64(a, b);
13679 }
13680 
13681 // CHECK-LABEL: @test_vcged_u64(
13682 // CHECK:   [[TMP0:%.*]] = icmp uge i64 %a, %b
13683 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13684 // CHECK:   ret i64 [[VCEQD_I]]
test_vcged_u64(uint64_t a,uint64_t b)13685 uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
13686   return (uint64_t)vcged_u64(a, b);
13687 }
13688 
13689 // CHECK-LABEL: @test_vcgezd_s64(
13690 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, 0
13691 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13692 // CHECK:   ret i64 [[VCGEZ_I]]
test_vcgezd_s64(int64_t a)13693 int64_t test_vcgezd_s64(int64_t a) {
13694   return (int64_t)vcgezd_s64(a);
13695 }
13696 
13697 // CHECK-LABEL: @test_vcgtd_s64(
13698 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, %b
13699 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13700 // CHECK:   ret i64 [[VCEQD_I]]
test_vcgtd_s64(int64_t a,int64_t b)13701 int64_t test_vcgtd_s64(int64_t a, int64_t b) {
13702   return (int64_t)vcgtd_s64(a, b);
13703 }
13704 
13705 // CHECK-LABEL: @test_vcgtd_u64(
13706 // CHECK:   [[TMP0:%.*]] = icmp ugt i64 %a, %b
13707 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13708 // CHECK:   ret i64 [[VCEQD_I]]
test_vcgtd_u64(uint64_t a,uint64_t b)13709 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
13710   return (uint64_t)vcgtd_u64(a, b);
13711 }
13712 
13713 // CHECK-LABEL: @test_vcgtzd_s64(
13714 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, 0
13715 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13716 // CHECK:   ret i64 [[VCGTZ_I]]
test_vcgtzd_s64(int64_t a)13717 int64_t test_vcgtzd_s64(int64_t a) {
13718   return (int64_t)vcgtzd_s64(a);
13719 }
13720 
13721 // CHECK-LABEL: @test_vcled_s64(
13722 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, %b
13723 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13724 // CHECK:   ret i64 [[VCEQD_I]]
test_vcled_s64(int64_t a,int64_t b)13725 int64_t test_vcled_s64(int64_t a, int64_t b) {
13726   return (int64_t)vcled_s64(a, b);
13727 }
13728 
13729 // CHECK-LABEL: @test_vcled_u64(
13730 // CHECK:   [[TMP0:%.*]] = icmp ule i64 %a, %b
13731 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13732 // CHECK:   ret i64 [[VCEQD_I]]
test_vcled_u64(uint64_t a,uint64_t b)13733 uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
13734   return (uint64_t)vcled_u64(a, b);
13735 }
13736 
13737 // CHECK-LABEL: @test_vclezd_s64(
13738 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, 0
13739 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13740 // CHECK:   ret i64 [[VCLEZ_I]]
test_vclezd_s64(int64_t a)13741 int64_t test_vclezd_s64(int64_t a) {
13742   return (int64_t)vclezd_s64(a);
13743 }
13744 
13745 // CHECK-LABEL: @test_vcltd_s64(
13746 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, %b
13747 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13748 // CHECK:   ret i64 [[VCEQD_I]]
test_vcltd_s64(int64_t a,int64_t b)13749 int64_t test_vcltd_s64(int64_t a, int64_t b) {
13750   return (int64_t)vcltd_s64(a, b);
13751 }
13752 
13753 // CHECK-LABEL: @test_vcltd_u64(
13754 // CHECK:   [[TMP0:%.*]] = icmp ult i64 %a, %b
13755 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13756 // CHECK:   ret i64 [[VCEQD_I]]
test_vcltd_u64(uint64_t a,uint64_t b)13757 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
13758   return (uint64_t)vcltd_u64(a, b);
13759 }
13760 
13761 // CHECK-LABEL: @test_vcltzd_s64(
13762 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, 0
13763 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13764 // CHECK:   ret i64 [[VCLTZ_I]]
test_vcltzd_s64(int64_t a)13765 int64_t test_vcltzd_s64(int64_t a) {
13766   return (int64_t)vcltzd_s64(a);
13767 }
13768 
13769 // CHECK-LABEL: @test_vtstd_s64(
13770 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
13771 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13772 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13773 // CHECK:   ret i64 [[VTSTD_I]]
test_vtstd_s64(int64_t a,int64_t b)13774 int64_t test_vtstd_s64(int64_t a, int64_t b) {
13775   return (int64_t)vtstd_s64(a, b);
13776 }
13777 
13778 // CHECK-LABEL: @test_vtstd_u64(
13779 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
13780 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13781 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13782 // CHECK:   ret i64 [[VTSTD_I]]
test_vtstd_u64(uint64_t a,uint64_t b)13783 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
13784   return (uint64_t)vtstd_u64(a, b);
13785 }
13786 
13787 // CHECK-LABEL: @test_vabsd_s64(
13788 // CHECK:   [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a)
13789 // CHECK:   ret i64 [[VABSD_S64_I]]
test_vabsd_s64(int64_t a)13790 int64_t test_vabsd_s64(int64_t a) {
13791   return (int64_t)vabsd_s64(a);
13792 }
13793 
13794 // CHECK-LABEL: @test_vqabsb_s8(
13795 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13796 // CHECK:   [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
13797 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
13798 // CHECK:   ret i8 [[TMP1]]
test_vqabsb_s8(int8_t a)13799 int8_t test_vqabsb_s8(int8_t a) {
13800   return (int8_t)vqabsb_s8(a);
13801 }
13802 
13803 // CHECK-LABEL: @test_vqabsh_s16(
13804 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13805 // CHECK:   [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
13806 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
13807 // CHECK:   ret i16 [[TMP1]]
test_vqabsh_s16(int16_t a)13808 int16_t test_vqabsh_s16(int16_t a) {
13809   return (int16_t)vqabsh_s16(a);
13810 }
13811 
13812 // CHECK-LABEL: @test_vqabss_s32(
13813 // CHECK:   [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
13814 // CHECK:   ret i32 [[VQABSS_S32_I]]
test_vqabss_s32(int32_t a)13815 int32_t test_vqabss_s32(int32_t a) {
13816   return (int32_t)vqabss_s32(a);
13817 }
13818 
13819 // CHECK-LABEL: @test_vqabsd_s64(
13820 // CHECK:   [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
13821 // CHECK:   ret i64 [[VQABSD_S64_I]]
test_vqabsd_s64(int64_t a)13822 int64_t test_vqabsd_s64(int64_t a) {
13823   return (int64_t)vqabsd_s64(a);
13824 }
13825 
13826 // CHECK-LABEL: @test_vnegd_s64(
13827 // CHECK:   [[VNEGD_I:%.*]] = sub i64 0, %a
13828 // CHECK:   ret i64 [[VNEGD_I]]
test_vnegd_s64(int64_t a)13829 int64_t test_vnegd_s64(int64_t a) {
13830   return (int64_t)vnegd_s64(a);
13831 }
13832 
13833 // CHECK-LABEL: @test_vqnegb_s8(
13834 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13835 // CHECK:   [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
13836 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
13837 // CHECK:   ret i8 [[TMP1]]
test_vqnegb_s8(int8_t a)13838 int8_t test_vqnegb_s8(int8_t a) {
13839   return (int8_t)vqnegb_s8(a);
13840 }
13841 
13842 // CHECK-LABEL: @test_vqnegh_s16(
13843 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13844 // CHECK:   [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
13845 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
13846 // CHECK:   ret i16 [[TMP1]]
test_vqnegh_s16(int16_t a)13847 int16_t test_vqnegh_s16(int16_t a) {
13848   return (int16_t)vqnegh_s16(a);
13849 }
13850 
13851 // CHECK-LABEL: @test_vqnegs_s32(
13852 // CHECK:   [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
13853 // CHECK:   ret i32 [[VQNEGS_S32_I]]
test_vqnegs_s32(int32_t a)13854 int32_t test_vqnegs_s32(int32_t a) {
13855   return (int32_t)vqnegs_s32(a);
13856 }
13857 
13858 // CHECK-LABEL: @test_vqnegd_s64(
13859 // CHECK:   [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
13860 // CHECK:   ret i64 [[VQNEGD_S64_I]]
test_vqnegd_s64(int64_t a)13861 int64_t test_vqnegd_s64(int64_t a) {
13862   return (int64_t)vqnegd_s64(a);
13863 }
13864 
13865 // CHECK-LABEL: @test_vuqaddb_s8(
13866 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13867 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
13868 // CHECK:   [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13869 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
13870 // CHECK:   ret i8 [[TMP2]]
test_vuqaddb_s8(int8_t a,uint8_t b)13871 int8_t test_vuqaddb_s8(int8_t a, uint8_t b) {
13872   return (int8_t)vuqaddb_s8(a, b);
13873 }
13874 
13875 // CHECK-LABEL: @test_vuqaddh_s16(
13876 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13877 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13878 // CHECK:   [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13879 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
13880 // CHECK:   ret i16 [[TMP2]]
test_vuqaddh_s16(int16_t a,uint16_t b)13881 int16_t test_vuqaddh_s16(int16_t a, uint16_t b) {
13882   return (int16_t)vuqaddh_s16(a, b);
13883 }
13884 
13885 // CHECK-LABEL: @test_vuqadds_s32(
13886 // CHECK:   [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
13887 // CHECK:   ret i32 [[VUQADDS_S32_I]]
test_vuqadds_s32(int32_t a,uint32_t b)13888 int32_t test_vuqadds_s32(int32_t a, uint32_t b) {
13889   return (int32_t)vuqadds_s32(a, b);
13890 }
13891 
13892 // CHECK-LABEL: @test_vuqaddd_s64(
13893 // CHECK:   [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
13894 // CHECK:   ret i64 [[VUQADDD_S64_I]]
test_vuqaddd_s64(int64_t a,uint64_t b)13895 int64_t test_vuqaddd_s64(int64_t a, uint64_t b) {
13896   return (int64_t)vuqaddd_s64(a, b);
13897 }
13898 
13899 // CHECK-LABEL: @test_vsqaddb_u8(
13900 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13901 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
13902 // CHECK:   [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13903 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
13904 // CHECK:   ret i8 [[TMP2]]
test_vsqaddb_u8(uint8_t a,int8_t b)13905 uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
13906   return (uint8_t)vsqaddb_u8(a, b);
13907 }
13908 
13909 // CHECK-LABEL: @test_vsqaddh_u16(
13910 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13911 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13912 // CHECK:   [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13913 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
13914 // CHECK:   ret i16 [[TMP2]]
test_vsqaddh_u16(uint16_t a,int16_t b)13915 uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
13916   return (uint16_t)vsqaddh_u16(a, b);
13917 }
13918 
13919 // CHECK-LABEL: @test_vsqadds_u32(
13920 // CHECK:   [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
13921 // CHECK:   ret i32 [[VSQADDS_U32_I]]
test_vsqadds_u32(uint32_t a,int32_t b)13922 uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
13923   return (uint32_t)vsqadds_u32(a, b);
13924 }
13925 
13926 // CHECK-LABEL: @test_vsqaddd_u64(
13927 // CHECK:   [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
13928 // CHECK:   ret i64 [[VSQADDD_U64_I]]
test_vsqaddd_u64(uint64_t a,int64_t b)13929 uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
13930   return (uint64_t)vsqaddd_u64(a, b);
13931 }
13932 
13933 // CHECK-LABEL: @test_vqdmlalh_s16(
13934 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13935 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
13936 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13937 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13938 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
13939 // CHECK:   ret i32 [[VQDMLXL1_I]]
test_vqdmlalh_s16(int32_t a,int16_t b,int16_t c)13940 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
13941   return (int32_t)vqdmlalh_s16(a, b, c);
13942 }
13943 
13944 // CHECK-LABEL: @test_vqdmlals_s32(
13945 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13946 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
13947 // CHECK:   ret i64 [[VQDMLXL1_I]]
test_vqdmlals_s32(int64_t a,int32_t b,int32_t c)13948 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
13949   return (int64_t)vqdmlals_s32(a, b, c);
13950 }
13951 
13952 // CHECK-LABEL: @test_vqdmlslh_s16(
13953 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13954 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
13955 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13956 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13957 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
13958 // CHECK:   ret i32 [[VQDMLXL1_I]]
test_vqdmlslh_s16(int32_t a,int16_t b,int16_t c)13959 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
13960   return (int32_t)vqdmlslh_s16(a, b, c);
13961 }
13962 
13963 // CHECK-LABEL: @test_vqdmlsls_s32(
13964 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13965 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
13966 // CHECK:   ret i64 [[VQDMLXL1_I]]
test_vqdmlsls_s32(int64_t a,int32_t b,int32_t c)13967 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
13968   return (int64_t)vqdmlsls_s32(a, b, c);
13969 }
13970 
13971 // CHECK-LABEL: @test_vqdmullh_s16(
13972 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13973 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13974 // CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13975 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
13976 // CHECK:   ret i32 [[TMP2]]
test_vqdmullh_s16(int16_t a,int16_t b)13977 int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
13978   return (int32_t)vqdmullh_s16(a, b);
13979 }
13980 
13981 // CHECK-LABEL: @test_vqdmulls_s32(
13982 // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
13983 // CHECK:   ret i64 [[VQDMULLS_S32_I]]
test_vqdmulls_s32(int32_t a,int32_t b)13984 int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
13985   return (int64_t)vqdmulls_s32(a, b);
13986 }
13987 
13988 // CHECK-LABEL: @test_vqmovunh_s16(
13989 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
13990 // CHECK:   [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
13991 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
13992 // CHECK:   ret i8 [[TMP1]]
test_vqmovunh_s16(int16_t a)13993 int8_t test_vqmovunh_s16(int16_t a) {
13994   return (int8_t)vqmovunh_s16(a);
13995 }
13996 
13997 // CHECK-LABEL: @test_vqmovuns_s32(
13998 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
13999 // CHECK:   [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
14000 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
14001 // CHECK:   ret i16 [[TMP1]]
test_vqmovuns_s32(int32_t a)14002 int16_t test_vqmovuns_s32(int32_t a) {
14003   return (int16_t)vqmovuns_s32(a);
14004 }
14005 
14006 // CHECK-LABEL: @test_vqmovund_s64(
14007 // CHECK:   [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
14008 // CHECK:   ret i32 [[VQMOVUND_S64_I]]
test_vqmovund_s64(int64_t a)14009 int32_t test_vqmovund_s64(int64_t a) {
14010   return (int32_t)vqmovund_s64(a);
14011 }
14012 
14013 // CHECK-LABEL: @test_vqmovnh_s16(
14014 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14015 // CHECK:   [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
14016 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
14017 // CHECK:   ret i8 [[TMP1]]
test_vqmovnh_s16(int16_t a)14018 int8_t test_vqmovnh_s16(int16_t a) {
14019   return (int8_t)vqmovnh_s16(a);
14020 }
14021 
14022 // CHECK-LABEL: @test_vqmovns_s32(
14023 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14024 // CHECK:   [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
14025 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
14026 // CHECK:   ret i16 [[TMP1]]
test_vqmovns_s32(int32_t a)14027 int16_t test_vqmovns_s32(int32_t a) {
14028   return (int16_t)vqmovns_s32(a);
14029 }
14030 
14031 // CHECK-LABEL: @test_vqmovnd_s64(
14032 // CHECK:   [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
14033 // CHECK:   ret i32 [[VQMOVND_S64_I]]
test_vqmovnd_s64(int64_t a)14034 int32_t test_vqmovnd_s64(int64_t a) {
14035   return (int32_t)vqmovnd_s64(a);
14036 }
14037 
14038 // CHECK-LABEL: @test_vqmovnh_u16(
14039 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14040 // CHECK:   [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
14041 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
14042 // CHECK:   ret i8 [[TMP1]]
test_vqmovnh_u16(int16_t a)14043 int8_t test_vqmovnh_u16(int16_t a) {
14044   return (int8_t)vqmovnh_u16(a);
14045 }
14046 
14047 // CHECK-LABEL: @test_vqmovns_u32(
14048 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14049 // CHECK:   [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
14050 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
14051 // CHECK:   ret i16 [[TMP1]]
test_vqmovns_u32(int32_t a)14052 int16_t test_vqmovns_u32(int32_t a) {
14053   return (int16_t)vqmovns_u32(a);
14054 }
14055 
14056 // CHECK-LABEL: @test_vqmovnd_u64(
14057 // CHECK:   [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
14058 // CHECK:   ret i32 [[VQMOVND_U64_I]]
test_vqmovnd_u64(int64_t a)14059 int32_t test_vqmovnd_u64(int64_t a) {
14060   return (int32_t)vqmovnd_u64(a);
14061 }
14062 
14063 // CHECK-LABEL: @test_vceqs_f32(
14064 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, %b
14065 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14066 // CHECK:   ret i32 [[VCMPD_I]]
test_vceqs_f32(float32_t a,float32_t b)14067 uint32_t test_vceqs_f32(float32_t a, float32_t b) {
14068   return (uint32_t)vceqs_f32(a, b);
14069 }
14070 
14071 // CHECK-LABEL: @test_vceqd_f64(
14072 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, %b
14073 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14074 // CHECK:   ret i64 [[VCMPD_I]]
test_vceqd_f64(float64_t a,float64_t b)14075 uint64_t test_vceqd_f64(float64_t a, float64_t b) {
14076   return (uint64_t)vceqd_f64(a, b);
14077 }
14078 
14079 // CHECK-LABEL: @test_vceqzs_f32(
14080 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
14081 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
14082 // CHECK:   ret i32 [[VCEQZ_I]]
test_vceqzs_f32(float32_t a)14083 uint32_t test_vceqzs_f32(float32_t a) {
14084   return (uint32_t)vceqzs_f32(a);
14085 }
14086 
14087 // CHECK-LABEL: @test_vceqzd_f64(
14088 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
14089 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
14090 // CHECK:   ret i64 [[VCEQZ_I]]
test_vceqzd_f64(float64_t a)14091 uint64_t test_vceqzd_f64(float64_t a) {
14092   return (uint64_t)vceqzd_f64(a);
14093 }
14094 
14095 // CHECK-LABEL: @test_vcges_f32(
14096 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, %b
14097 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14098 // CHECK:   ret i32 [[VCMPD_I]]
test_vcges_f32(float32_t a,float32_t b)14099 uint32_t test_vcges_f32(float32_t a, float32_t b) {
14100   return (uint32_t)vcges_f32(a, b);
14101 }
14102 
14103 // CHECK-LABEL: @test_vcged_f64(
14104 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, %b
14105 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14106 // CHECK:   ret i64 [[VCMPD_I]]
test_vcged_f64(float64_t a,float64_t b)14107 uint64_t test_vcged_f64(float64_t a, float64_t b) {
14108   return (uint64_t)vcged_f64(a, b);
14109 }
14110 
14111 // CHECK-LABEL: @test_vcgezs_f32(
14112 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
14113 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14114 // CHECK:   ret i32 [[VCGEZ_I]]
test_vcgezs_f32(float32_t a)14115 uint32_t test_vcgezs_f32(float32_t a) {
14116   return (uint32_t)vcgezs_f32(a);
14117 }
14118 
14119 // CHECK-LABEL: @test_vcgezd_f64(
14120 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
14121 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14122 // CHECK:   ret i64 [[VCGEZ_I]]
test_vcgezd_f64(float64_t a)14123 uint64_t test_vcgezd_f64(float64_t a) {
14124   return (uint64_t)vcgezd_f64(a);
14125 }
14126 
14127 // CHECK-LABEL: @test_vcgts_f32(
14128 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, %b
14129 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14130 // CHECK:   ret i32 [[VCMPD_I]]
test_vcgts_f32(float32_t a,float32_t b)14131 uint32_t test_vcgts_f32(float32_t a, float32_t b) {
14132   return (uint32_t)vcgts_f32(a, b);
14133 }
14134 
14135 // CHECK-LABEL: @test_vcgtd_f64(
14136 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, %b
14137 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14138 // CHECK:   ret i64 [[VCMPD_I]]
test_vcgtd_f64(float64_t a,float64_t b)14139 uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
14140   return (uint64_t)vcgtd_f64(a, b);
14141 }
14142 
14143 // CHECK-LABEL: @test_vcgtzs_f32(
14144 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
14145 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14146 // CHECK:   ret i32 [[VCGTZ_I]]
test_vcgtzs_f32(float32_t a)14147 uint32_t test_vcgtzs_f32(float32_t a) {
14148   return (uint32_t)vcgtzs_f32(a);
14149 }
14150 
14151 // CHECK-LABEL: @test_vcgtzd_f64(
14152 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
14153 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14154 // CHECK:   ret i64 [[VCGTZ_I]]
test_vcgtzd_f64(float64_t a)14155 uint64_t test_vcgtzd_f64(float64_t a) {
14156   return (uint64_t)vcgtzd_f64(a);
14157 }
14158 
14159 // CHECK-LABEL: @test_vcles_f32(
14160 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, %b
14161 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14162 // CHECK:   ret i32 [[VCMPD_I]]
test_vcles_f32(float32_t a,float32_t b)14163 uint32_t test_vcles_f32(float32_t a, float32_t b) {
14164   return (uint32_t)vcles_f32(a, b);
14165 }
14166 
14167 // CHECK-LABEL: @test_vcled_f64(
14168 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, %b
14169 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14170 // CHECK:   ret i64 [[VCMPD_I]]
test_vcled_f64(float64_t a,float64_t b)14171 uint64_t test_vcled_f64(float64_t a, float64_t b) {
14172   return (uint64_t)vcled_f64(a, b);
14173 }
14174 
14175 // CHECK-LABEL: @test_vclezs_f32(
14176 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
14177 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14178 // CHECK:   ret i32 [[VCLEZ_I]]
test_vclezs_f32(float32_t a)14179 uint32_t test_vclezs_f32(float32_t a) {
14180   return (uint32_t)vclezs_f32(a);
14181 }
14182 
14183 // CHECK-LABEL: @test_vclezd_f64(
14184 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
14185 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14186 // CHECK:   ret i64 [[VCLEZ_I]]
test_vclezd_f64(float64_t a)14187 uint64_t test_vclezd_f64(float64_t a) {
14188   return (uint64_t)vclezd_f64(a);
14189 }
14190 
14191 // CHECK-LABEL: @test_vclts_f32(
14192 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, %b
14193 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14194 // CHECK:   ret i32 [[VCMPD_I]]
test_vclts_f32(float32_t a,float32_t b)14195 uint32_t test_vclts_f32(float32_t a, float32_t b) {
14196   return (uint32_t)vclts_f32(a, b);
14197 }
14198 
14199 // CHECK-LABEL: @test_vcltd_f64(
14200 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, %b
14201 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14202 // CHECK:   ret i64 [[VCMPD_I]]
test_vcltd_f64(float64_t a,float64_t b)14203 uint64_t test_vcltd_f64(float64_t a, float64_t b) {
14204   return (uint64_t)vcltd_f64(a, b);
14205 }
14206 
14207 // CHECK-LABEL: @test_vcltzs_f32(
14208 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
14209 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14210 // CHECK:   ret i32 [[VCLTZ_I]]
test_vcltzs_f32(float32_t a)14211 uint32_t test_vcltzs_f32(float32_t a) {
14212   return (uint32_t)vcltzs_f32(a);
14213 }
14214 
14215 // CHECK-LABEL: @test_vcltzd_f64(
14216 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
14217 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14218 // CHECK:   ret i64 [[VCLTZ_I]]
test_vcltzd_f64(float64_t a)14219 uint64_t test_vcltzd_f64(float64_t a) {
14220   return (uint64_t)vcltzd_f64(a);
14221 }
14222 
14223 // CHECK-LABEL: @test_vcages_f32(
14224 // CHECK:   [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b)
14225 // CHECK:   ret i32 [[VCAGES_F32_I]]
test_vcages_f32(float32_t a,float32_t b)14226 uint32_t test_vcages_f32(float32_t a, float32_t b) {
14227   return (uint32_t)vcages_f32(a, b);
14228 }
14229 
14230 // CHECK-LABEL: @test_vcaged_f64(
14231 // CHECK:   [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b)
14232 // CHECK:   ret i64 [[VCAGED_F64_I]]
test_vcaged_f64(float64_t a,float64_t b)14233 uint64_t test_vcaged_f64(float64_t a, float64_t b) {
14234   return (uint64_t)vcaged_f64(a, b);
14235 }
14236 
14237 // CHECK-LABEL: @test_vcagts_f32(
14238 // CHECK:   [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b)
14239 // CHECK:   ret i32 [[VCAGTS_F32_I]]
test_vcagts_f32(float32_t a,float32_t b)14240 uint32_t test_vcagts_f32(float32_t a, float32_t b) {
14241   return (uint32_t)vcagts_f32(a, b);
14242 }
14243 
14244 // CHECK-LABEL: @test_vcagtd_f64(
14245 // CHECK:   [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b)
14246 // CHECK:   ret i64 [[VCAGTD_F64_I]]
test_vcagtd_f64(float64_t a,float64_t b)14247 uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
14248   return (uint64_t)vcagtd_f64(a, b);
14249 }
14250 
14251 // CHECK-LABEL: @test_vcales_f32(
14252 // CHECK:   [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a)
14253 // CHECK:   ret i32 [[VCALES_F32_I]]
test_vcales_f32(float32_t a,float32_t b)14254 uint32_t test_vcales_f32(float32_t a, float32_t b) {
14255   return (uint32_t)vcales_f32(a, b);
14256 }
14257 
14258 // CHECK-LABEL: @test_vcaled_f64(
14259 // CHECK:   [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a)
14260 // CHECK:   ret i64 [[VCALED_F64_I]]
test_vcaled_f64(float64_t a,float64_t b)14261 uint64_t test_vcaled_f64(float64_t a, float64_t b) {
14262   return (uint64_t)vcaled_f64(a, b);
14263 }
14264 
14265 // CHECK-LABEL: @test_vcalts_f32(
14266 // CHECK:   [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
14267 // CHECK:   ret i32 [[VCALTS_F32_I]]
test_vcalts_f32(float32_t a,float32_t b)14268 uint32_t test_vcalts_f32(float32_t a, float32_t b) {
14269   return (uint32_t)vcalts_f32(a, b);
14270 }
14271 
14272 // CHECK-LABEL: @test_vcaltd_f64(
14273 // CHECK:   [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a)
14274 // CHECK:   ret i64 [[VCALTD_F64_I]]
test_vcaltd_f64(float64_t a,float64_t b)14275 uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
14276   return (uint64_t)vcaltd_f64(a, b);
14277 }
14278 
14279 // CHECK-LABEL: @test_vshrd_n_s64(
14280 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %a, 1
14281 // CHECK:   ret i64 [[SHRD_N]]
test_vshrd_n_s64(int64_t a)14282 int64_t test_vshrd_n_s64(int64_t a) {
14283   return (int64_t)vshrd_n_s64(a, 1);
14284 }
14285 
14286 // CHECK-LABEL: @test_vshr_n_s64(
14287 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14288 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14289 // CHECK:   [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14290 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)14291 int64x1_t test_vshr_n_s64(int64x1_t a) {
14292   return vshr_n_s64(a, 1);
14293 }
14294 
14295 // CHECK-LABEL: @test_vshrd_n_u64(
14296 // CHECK:   ret i64 0
test_vshrd_n_u64(uint64_t a)14297 uint64_t test_vshrd_n_u64(uint64_t a) {
14298   return (uint64_t)vshrd_n_u64(a, 64);
14299 }
14300 
14301 // CHECK-LABEL: @test_vshrd_n_u64_2(
14302 // CHECK:   ret i64 0
test_vshrd_n_u64_2()14303 uint64_t test_vshrd_n_u64_2() {
14304   uint64_t a = UINT64_C(0xf000000000000000);
14305   return vshrd_n_u64(a, 64);
14306 }
14307 
14308 // CHECK-LABEL: @test_vshr_n_u64(
14309 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14310 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14311 // CHECK:   [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14312 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)14313 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14314   return vshr_n_u64(a, 1);
14315 }
14316 
14317 // CHECK-LABEL: @test_vrshrd_n_s64(
14318 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
14319 // CHECK:   ret i64 [[VRSHR_N]]
test_vrshrd_n_s64(int64_t a)14320 int64_t test_vrshrd_n_s64(int64_t a) {
14321   return (int64_t)vrshrd_n_s64(a, 63);
14322 }
14323 
14324 // CHECK-LABEL: @test_vrshr_n_s64(
14325 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14326 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14327 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14328 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)14329 int64x1_t test_vrshr_n_s64(int64x1_t a) {
14330   return vrshr_n_s64(a, 1);
14331 }
14332 
14333 // CHECK-LABEL: @test_vrshrd_n_u64(
14334 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
14335 // CHECK:   ret i64 [[VRSHR_N]]
test_vrshrd_n_u64(uint64_t a)14336 uint64_t test_vrshrd_n_u64(uint64_t a) {
14337   return (uint64_t)vrshrd_n_u64(a, 63);
14338 }
14339 
14340 // CHECK-LABEL: @test_vrshr_n_u64(
14341 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14342 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14343 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14344 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)14345 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
14346   return vrshr_n_u64(a, 1);
14347 }
14348 
14349 // CHECK-LABEL: @test_vsrad_n_s64(
14350 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %b, 63
14351 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14352 // CHECK:   ret i64 [[TMP0]]
test_vsrad_n_s64(int64_t a,int64_t b)14353 int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
14354   return (int64_t)vsrad_n_s64(a, b, 63);
14355 }
14356 
14357 // CHECK-LABEL: @test_vsra_n_s64(
14358 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14359 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14360 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14361 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14362 // CHECK:   [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
14363 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14364 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)14365 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
14366   return vsra_n_s64(a, b, 1);
14367 }
14368 
14369 // CHECK-LABEL: @test_vsrad_n_u64(
14370 // CHECK:   [[SHRD_N:%.*]] = lshr i64 %b, 63
14371 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14372 // CHECK:   ret i64 [[TMP0]]
test_vsrad_n_u64(uint64_t a,uint64_t b)14373 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
14374   return (uint64_t)vsrad_n_u64(a, b, 63);
14375 }
14376 
14377 // CHECK-LABEL: @test_vsrad_n_u64_2(
14378 // CHECK:   ret i64 %a
test_vsrad_n_u64_2(uint64_t a,uint64_t b)14379 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
14380   return (uint64_t)vsrad_n_u64(a, b, 64);
14381 }
14382 
14383 // CHECK-LABEL: @test_vsra_n_u64(
14384 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14385 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14386 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14387 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14388 // CHECK:   [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
14389 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14390 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)14391 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
14392   return vsra_n_u64(a, b, 1);
14393 }
14394 
14395 // CHECK-LABEL: @test_vrsrad_n_s64(
14396 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
14397 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14398 // CHECK:   ret i64 [[TMP1]]
test_vrsrad_n_s64(int64_t a,int64_t b)14399 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
14400   return (int64_t)vrsrad_n_s64(a, b, 63);
14401 }
14402 
14403 // CHECK-LABEL: @test_vrsra_n_s64(
14404 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14405 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14406 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14407 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14408 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14409 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14410 // CHECK:   ret <1 x i64> [[TMP3]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)14411 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
14412   return vrsra_n_s64(a, b, 1);
14413 }
14414 
14415 // CHECK-LABEL: @test_vrsrad_n_u64(
14416 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
14417 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14418 // CHECK:   ret i64 [[TMP1]]
test_vrsrad_n_u64(uint64_t a,uint64_t b)14419 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
14420   return (uint64_t)vrsrad_n_u64(a, b, 63);
14421 }
14422 
14423 // CHECK-LABEL: @test_vrsra_n_u64(
14424 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14425 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14426 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14427 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14428 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14429 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14430 // CHECK:   ret <1 x i64> [[TMP3]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)14431 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
14432   return vrsra_n_u64(a, b, 1);
14433 }
14434 
14435 // CHECK-LABEL: @test_vshld_n_s64(
14436 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 1
14437 // CHECK:   ret i64 [[SHLD_N]]
test_vshld_n_s64(int64_t a)14438 int64_t test_vshld_n_s64(int64_t a) {
14439   return (int64_t)vshld_n_s64(a, 1);
14440 }
14441 
14442 // CHECK-LABEL: @test_vshl_n_s64(
14443 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14444 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14445 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14446 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)14447 int64x1_t test_vshl_n_s64(int64x1_t a) {
14448   return vshl_n_s64(a, 1);
14449 }
14450 
14451 // CHECK-LABEL: @test_vshld_n_u64(
14452 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 63
14453 // CHECK:   ret i64 [[SHLD_N]]
test_vshld_n_u64(uint64_t a)14454 uint64_t test_vshld_n_u64(uint64_t a) {
14455   return (uint64_t)vshld_n_u64(a, 63);
14456 }
14457 
14458 // CHECK-LABEL: @test_vshl_n_u64(
14459 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14460 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14461 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14462 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)14463 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14464   return vshl_n_u64(a, 1);
14465 }
14466 
14467 // CHECK-LABEL: @test_vqshlb_n_s8(
14468 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14469 // CHECK:   [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14470 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
14471 // CHECK:   ret i8 [[TMP1]]
test_vqshlb_n_s8(int8_t a)14472 int8_t test_vqshlb_n_s8(int8_t a) {
14473   return (int8_t)vqshlb_n_s8(a, 7);
14474 }
14475 
14476 // CHECK-LABEL: @test_vqshlh_n_s16(
14477 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14478 // CHECK:   [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14479 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
14480 // CHECK:   ret i16 [[TMP1]]
test_vqshlh_n_s16(int16_t a)14481 int16_t test_vqshlh_n_s16(int16_t a) {
14482   return (int16_t)vqshlh_n_s16(a, 15);
14483 }
14484 
14485 // CHECK-LABEL: @test_vqshls_n_s32(
14486 // CHECK:   [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
14487 // CHECK:   ret i32 [[VQSHLS_N_S32]]
test_vqshls_n_s32(int32_t a)14488 int32_t test_vqshls_n_s32(int32_t a) {
14489   return (int32_t)vqshls_n_s32(a, 31);
14490 }
14491 
14492 // CHECK-LABEL: @test_vqshld_n_s64(
14493 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
14494 // CHECK:   ret i64 [[VQSHL_N]]
test_vqshld_n_s64(int64_t a)14495 int64_t test_vqshld_n_s64(int64_t a) {
14496   return (int64_t)vqshld_n_s64(a, 63);
14497 }
14498 
14499 // CHECK-LABEL: @test_vqshl_n_s8(
14500 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14501 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)14502 int8x8_t test_vqshl_n_s8(int8x8_t a) {
14503   return vqshl_n_s8(a, 0);
14504 }
14505 
14506 // CHECK-LABEL: @test_vqshlq_n_s8(
14507 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14508 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)14509 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
14510   return vqshlq_n_s8(a, 0);
14511 }
14512 
14513 // CHECK-LABEL: @test_vqshl_n_s16(
14514 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14515 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14516 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14517 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)14518 int16x4_t test_vqshl_n_s16(int16x4_t a) {
14519   return vqshl_n_s16(a, 0);
14520 }
14521 
14522 // CHECK-LABEL: @test_vqshlq_n_s16(
14523 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14524 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14525 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14526 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)14527 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
14528   return vqshlq_n_s16(a, 0);
14529 }
14530 
14531 // CHECK-LABEL: @test_vqshl_n_s32(
14532 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14533 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14534 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14535 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)14536 int32x2_t test_vqshl_n_s32(int32x2_t a) {
14537   return vqshl_n_s32(a, 0);
14538 }
14539 
14540 // CHECK-LABEL: @test_vqshlq_n_s32(
14541 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14542 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14543 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14544 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)14545 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
14546   return vqshlq_n_s32(a, 0);
14547 }
14548 
14549 // CHECK-LABEL: @test_vqshlq_n_s64(
14550 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14551 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14552 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14553 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)14554 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
14555   return vqshlq_n_s64(a, 0);
14556 }
14557 
14558 // CHECK-LABEL: @test_vqshl_n_u8(
14559 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14560 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)14561 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
14562   return vqshl_n_u8(a, 0);
14563 }
14564 
14565 // CHECK-LABEL: @test_vqshlq_n_u8(
14566 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14567 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)14568 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
14569   return vqshlq_n_u8(a, 0);
14570 }
14571 
14572 // CHECK-LABEL: @test_vqshl_n_u16(
14573 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14574 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14575 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14576 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)14577 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
14578   return vqshl_n_u16(a, 0);
14579 }
14580 
14581 // CHECK-LABEL: @test_vqshlq_n_u16(
14582 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14583 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14584 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14585 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)14586 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
14587   return vqshlq_n_u16(a, 0);
14588 }
14589 
14590 // CHECK-LABEL: @test_vqshl_n_u32(
14591 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14592 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14593 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14594 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)14595 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
14596   return vqshl_n_u32(a, 0);
14597 }
14598 
14599 // CHECK-LABEL: @test_vqshlq_n_u32(
14600 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14601 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14602 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14603 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)14604 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
14605   return vqshlq_n_u32(a, 0);
14606 }
14607 
14608 // CHECK-LABEL: @test_vqshlq_n_u64(
14609 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14610 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14611 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14612 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)14613 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
14614   return vqshlq_n_u64(a, 0);
14615 }
14616 
14617 // CHECK-LABEL: @test_vqshl_n_s64(
14618 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14619 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14620 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14621 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)14622 int64x1_t test_vqshl_n_s64(int64x1_t a) {
14623   return vqshl_n_s64(a, 1);
14624 }
14625 
14626 // CHECK-LABEL: @test_vqshlb_n_u8(
14627 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14628 // CHECK:   [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14629 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
14630 // CHECK:   ret i8 [[TMP1]]
test_vqshlb_n_u8(uint8_t a)14631 uint8_t test_vqshlb_n_u8(uint8_t a) {
14632   return (uint8_t)vqshlb_n_u8(a, 7);
14633 }
14634 
14635 // CHECK-LABEL: @test_vqshlh_n_u16(
14636 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14637 // CHECK:   [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14638 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
14639 // CHECK:   ret i16 [[TMP1]]
test_vqshlh_n_u16(uint16_t a)14640 uint16_t test_vqshlh_n_u16(uint16_t a) {
14641   return (uint16_t)vqshlh_n_u16(a, 15);
14642 }
14643 
14644 // CHECK-LABEL: @test_vqshls_n_u32(
14645 // CHECK:   [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
14646 // CHECK:   ret i32 [[VQSHLS_N_U32]]
test_vqshls_n_u32(uint32_t a)14647 uint32_t test_vqshls_n_u32(uint32_t a) {
14648   return (uint32_t)vqshls_n_u32(a, 31);
14649 }
14650 
14651 // CHECK-LABEL: @test_vqshld_n_u64(
14652 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
14653 // CHECK:   ret i64 [[VQSHL_N]]
test_vqshld_n_u64(uint64_t a)14654 uint64_t test_vqshld_n_u64(uint64_t a) {
14655   return (uint64_t)vqshld_n_u64(a, 63);
14656 }
14657 
14658 // CHECK-LABEL: @test_vqshl_n_u64(
14659 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14660 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14661 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14662 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)14663 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
14664   return vqshl_n_u64(a, 1);
14665 }
14666 
14667 // CHECK-LABEL: @test_vqshlub_n_s8(
14668 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14669 // CHECK:   [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14670 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
14671 // CHECK:   ret i8 [[TMP1]]
test_vqshlub_n_s8(int8_t a)14672 int8_t test_vqshlub_n_s8(int8_t a) {
14673   return (int8_t)vqshlub_n_s8(a, 7);
14674 }
14675 
14676 // CHECK-LABEL: @test_vqshluh_n_s16(
14677 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14678 // CHECK:   [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14679 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
14680 // CHECK:   ret i16 [[TMP1]]
test_vqshluh_n_s16(int16_t a)14681 int16_t test_vqshluh_n_s16(int16_t a) {
14682   return (int16_t)vqshluh_n_s16(a, 15);
14683 }
14684 
14685 // CHECK-LABEL: @test_vqshlus_n_s32(
14686 // CHECK:   [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
14687 // CHECK:   ret i32 [[VQSHLUS_N_S32]]
test_vqshlus_n_s32(int32_t a)14688 int32_t test_vqshlus_n_s32(int32_t a) {
14689   return (int32_t)vqshlus_n_s32(a, 31);
14690 }
14691 
14692 // CHECK-LABEL: @test_vqshlud_n_s64(
14693 // CHECK:   [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
14694 // CHECK:   ret i64 [[VQSHLU_N]]
test_vqshlud_n_s64(int64_t a)14695 int64_t test_vqshlud_n_s64(int64_t a) {
14696   return (int64_t)vqshlud_n_s64(a, 63);
14697 }
14698 
14699 // CHECK-LABEL: @test_vqshlu_n_s64(
14700 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14701 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14702 // CHECK:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
14703 // CHECK:   ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)14704 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
14705   return vqshlu_n_s64(a, 1);
14706 }
14707 
14708 // CHECK-LABEL: @test_vsrid_n_s64(
14709 // CHECK:   [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14710 // CHECK:   [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14711 // CHECK:   [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
14712 // CHECK:   [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
14713 // CHECK:   ret i64 [[VSRID_N_S643]]
test_vsrid_n_s64(int64_t a,int64_t b)14714 int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
14715   return (int64_t)vsrid_n_s64(a, b, 63);
14716 }
14717 
14718 // CHECK-LABEL: @test_vsri_n_s64(
14719 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14720 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14721 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14722 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14723 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14724 // CHECK:   ret <1 x i64> [[VSRI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)14725 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
14726   return vsri_n_s64(a, b, 1);
14727 }
14728 
14729 // CHECK-LABEL: @test_vsrid_n_u64(
14730 // CHECK:   [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14731 // CHECK:   [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14732 // CHECK:   [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
14733 // CHECK:   [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
14734 // CHECK:   ret i64 [[VSRID_N_U643]]
test_vsrid_n_u64(uint64_t a,uint64_t b)14735 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
14736   return (uint64_t)vsrid_n_u64(a, b, 63);
14737 }
14738 
14739 // CHECK-LABEL: @test_vsri_n_u64(
14740 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14741 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14742 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14743 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14744 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14745 // CHECK:   ret <1 x i64> [[VSRI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)14746 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
14747   return vsri_n_u64(a, b, 1);
14748 }
14749 
14750 // CHECK-LABEL: @test_vslid_n_s64(
14751 // CHECK:   [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14752 // CHECK:   [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14753 // CHECK:   [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
14754 // CHECK:   [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
14755 // CHECK:   ret i64 [[VSLID_N_S643]]
test_vslid_n_s64(int64_t a,int64_t b)14756 int64_t test_vslid_n_s64(int64_t a, int64_t b) {
14757   return (int64_t)vslid_n_s64(a, b, 63);
14758 }
14759 
14760 // CHECK-LABEL: @test_vsli_n_s64(
14761 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14762 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14763 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14764 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14765 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14766 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)14767 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14768   return vsli_n_s64(a, b, 1);
14769 }
14770 
14771 // CHECK-LABEL: @test_vslid_n_u64(
14772 // CHECK:   [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14773 // CHECK:   [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14774 // CHECK:   [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
14775 // CHECK:   [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
14776 // CHECK:   ret i64 [[VSLID_N_U643]]
test_vslid_n_u64(uint64_t a,uint64_t b)14777 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
14778   return (uint64_t)vslid_n_u64(a, b, 63);
14779 }
14780 
14781 // CHECK-LABEL: @test_vsli_n_u64(
14782 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14783 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14784 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14785 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14786 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14787 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)14788 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
14789   return vsli_n_u64(a, b, 1);
14790 }
14791 
14792 // CHECK-LABEL: @test_vqshrnh_n_s16(
14793 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14794 // CHECK:   [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14795 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
14796 // CHECK:   ret i8 [[TMP1]]
test_vqshrnh_n_s16(int16_t a)14797 int8_t test_vqshrnh_n_s16(int16_t a) {
14798   return (int8_t)vqshrnh_n_s16(a, 8);
14799 }
14800 
14801 // CHECK-LABEL: @test_vqshrns_n_s32(
14802 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14803 // CHECK:   [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14804 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
14805 // CHECK:   ret i16 [[TMP1]]
test_vqshrns_n_s32(int32_t a)14806 int16_t test_vqshrns_n_s32(int32_t a) {
14807   return (int16_t)vqshrns_n_s32(a, 16);
14808 }
14809 
14810 // CHECK-LABEL: @test_vqshrnd_n_s64(
14811 // CHECK:   [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
14812 // CHECK:   ret i32 [[VQSHRND_N_S64]]
test_vqshrnd_n_s64(int64_t a)14813 int32_t test_vqshrnd_n_s64(int64_t a) {
14814   return (int32_t)vqshrnd_n_s64(a, 32);
14815 }
14816 
14817 // CHECK-LABEL: @test_vqshrnh_n_u16(
14818 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14819 // CHECK:   [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14820 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
14821 // CHECK:   ret i8 [[TMP1]]
test_vqshrnh_n_u16(uint16_t a)14822 uint8_t test_vqshrnh_n_u16(uint16_t a) {
14823   return (uint8_t)vqshrnh_n_u16(a, 8);
14824 }
14825 
14826 // CHECK-LABEL: @test_vqshrns_n_u32(
14827 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14828 // CHECK:   [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14829 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
14830 // CHECK:   ret i16 [[TMP1]]
test_vqshrns_n_u32(uint32_t a)14831 uint16_t test_vqshrns_n_u32(uint32_t a) {
14832   return (uint16_t)vqshrns_n_u32(a, 16);
14833 }
14834 
14835 // CHECK-LABEL: @test_vqshrnd_n_u64(
14836 // CHECK:   [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
14837 // CHECK:   ret i32 [[VQSHRND_N_U64]]
test_vqshrnd_n_u64(uint64_t a)14838 uint32_t test_vqshrnd_n_u64(uint64_t a) {
14839   return (uint32_t)vqshrnd_n_u64(a, 32);
14840 }
14841 
14842 // CHECK-LABEL: @test_vqrshrnh_n_s16(
14843 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14844 // CHECK:   [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14845 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
14846 // CHECK:   ret i8 [[TMP1]]
test_vqrshrnh_n_s16(int16_t a)14847 int8_t test_vqrshrnh_n_s16(int16_t a) {
14848   return (int8_t)vqrshrnh_n_s16(a, 8);
14849 }
14850 
14851 // CHECK-LABEL: @test_vqrshrns_n_s32(
14852 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14853 // CHECK:   [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14854 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
14855 // CHECK:   ret i16 [[TMP1]]
test_vqrshrns_n_s32(int32_t a)14856 int16_t test_vqrshrns_n_s32(int32_t a) {
14857   return (int16_t)vqrshrns_n_s32(a, 16);
14858 }
14859 
14860 // CHECK-LABEL: @test_vqrshrnd_n_s64(
14861 // CHECK:   [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
14862 // CHECK:   ret i32 [[VQRSHRND_N_S64]]
test_vqrshrnd_n_s64(int64_t a)14863 int32_t test_vqrshrnd_n_s64(int64_t a) {
14864   return (int32_t)vqrshrnd_n_s64(a, 32);
14865 }
14866 
14867 // CHECK-LABEL: @test_vqrshrnh_n_u16(
14868 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14869 // CHECK:   [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14870 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
14871 // CHECK:   ret i8 [[TMP1]]
test_vqrshrnh_n_u16(uint16_t a)14872 uint8_t test_vqrshrnh_n_u16(uint16_t a) {
14873   return (uint8_t)vqrshrnh_n_u16(a, 8);
14874 }
14875 
14876 // CHECK-LABEL: @test_vqrshrns_n_u32(
14877 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14878 // CHECK:   [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14879 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
14880 // CHECK:   ret i16 [[TMP1]]
test_vqrshrns_n_u32(uint32_t a)14881 uint16_t test_vqrshrns_n_u32(uint32_t a) {
14882   return (uint16_t)vqrshrns_n_u32(a, 16);
14883 }
14884 
14885 // CHECK-LABEL: @test_vqrshrnd_n_u64(
14886 // CHECK:   [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
14887 // CHECK:   ret i32 [[VQRSHRND_N_U64]]
test_vqrshrnd_n_u64(uint64_t a)14888 uint32_t test_vqrshrnd_n_u64(uint64_t a) {
14889   return (uint32_t)vqrshrnd_n_u64(a, 32);
14890 }
14891 
14892 // CHECK-LABEL: @test_vqshrunh_n_s16(
14893 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14894 // CHECK:   [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14895 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
14896 // CHECK:   ret i8 [[TMP1]]
test_vqshrunh_n_s16(int16_t a)14897 int8_t test_vqshrunh_n_s16(int16_t a) {
14898   return (int8_t)vqshrunh_n_s16(a, 8);
14899 }
14900 
14901 // CHECK-LABEL: @test_vqshruns_n_s32(
14902 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14903 // CHECK:   [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14904 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
14905 // CHECK:   ret i16 [[TMP1]]
test_vqshruns_n_s32(int32_t a)14906 int16_t test_vqshruns_n_s32(int32_t a) {
14907   return (int16_t)vqshruns_n_s32(a, 16);
14908 }
14909 
14910 // CHECK-LABEL: @test_vqshrund_n_s64(
14911 // CHECK:   [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
14912 // CHECK:   ret i32 [[VQSHRUND_N_S64]]
test_vqshrund_n_s64(int64_t a)14913 int32_t test_vqshrund_n_s64(int64_t a) {
14914   return (int32_t)vqshrund_n_s64(a, 32);
14915 }
14916 
14917 // CHECK-LABEL: @test_vqrshrunh_n_s16(
14918 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14919 // CHECK:   [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14920 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
14921 // CHECK:   ret i8 [[TMP1]]
test_vqrshrunh_n_s16(int16_t a)14922 int8_t test_vqrshrunh_n_s16(int16_t a) {
14923   return (int8_t)vqrshrunh_n_s16(a, 8);
14924 }
14925 
14926 // CHECK-LABEL: @test_vqrshruns_n_s32(
14927 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14928 // CHECK:   [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14929 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
14930 // CHECK:   ret i16 [[TMP1]]
test_vqrshruns_n_s32(int32_t a)14931 int16_t test_vqrshruns_n_s32(int32_t a) {
14932   return (int16_t)vqrshruns_n_s32(a, 16);
14933 }
14934 
14935 // CHECK-LABEL: @test_vqrshrund_n_s64(
14936 // CHECK:   [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
14937 // CHECK:   ret i32 [[VQRSHRUND_N_S64]]
test_vqrshrund_n_s64(int64_t a)14938 int32_t test_vqrshrund_n_s64(int64_t a) {
14939   return (int32_t)vqrshrund_n_s64(a, 32);
14940 }
14941 
14942 // CHECK-LABEL: @test_vcvts_n_f32_s32(
14943 // CHECK:   [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
14944 // CHECK:   ret float [[VCVTS_N_F32_S32]]
test_vcvts_n_f32_s32(int32_t a)14945 float32_t test_vcvts_n_f32_s32(int32_t a) {
14946   return vcvts_n_f32_s32(a, 1);
14947 }
14948 
14949 // CHECK-LABEL: @test_vcvtd_n_f64_s64(
14950 // CHECK:   [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
14951 // CHECK:   ret double [[VCVTD_N_F64_S64]]
test_vcvtd_n_f64_s64(int64_t a)14952 float64_t test_vcvtd_n_f64_s64(int64_t a) {
14953   return vcvtd_n_f64_s64(a, 1);
14954 }
14955 
14956 // CHECK-LABEL: @test_vcvts_n_f32_u32(
14957 // CHECK:   [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
14958 // CHECK:   ret float [[VCVTS_N_F32_U32]]
test_vcvts_n_f32_u32(uint32_t a)14959 float32_t test_vcvts_n_f32_u32(uint32_t a) {
14960   return vcvts_n_f32_u32(a, 32);
14961 }
14962 
14963 // CHECK-LABEL: @test_vcvtd_n_f64_u64(
14964 // CHECK:   [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
14965 // CHECK:   ret double [[VCVTD_N_F64_U64]]
test_vcvtd_n_f64_u64(uint64_t a)14966 float64_t test_vcvtd_n_f64_u64(uint64_t a) {
14967   return vcvtd_n_f64_u64(a, 64);
14968 }
14969 
14970 // CHECK-LABEL: @test_vcvts_n_s32_f32(
14971 // CHECK:   [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
14972 // CHECK:   ret i32 [[VCVTS_N_S32_F32]]
test_vcvts_n_s32_f32(float32_t a)14973 int32_t test_vcvts_n_s32_f32(float32_t a) {
14974   return (int32_t)vcvts_n_s32_f32(a, 1);
14975 }
14976 
14977 // CHECK-LABEL: @test_vcvtd_n_s64_f64(
14978 // CHECK:   [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
14979 // CHECK:   ret i64 [[VCVTD_N_S64_F64]]
test_vcvtd_n_s64_f64(float64_t a)14980 int64_t test_vcvtd_n_s64_f64(float64_t a) {
14981   return (int64_t)vcvtd_n_s64_f64(a, 1);
14982 }
14983 
14984 // CHECK-LABEL: @test_vcvts_n_u32_f32(
14985 // CHECK:   [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
14986 // CHECK:   ret i32 [[VCVTS_N_U32_F32]]
test_vcvts_n_u32_f32(float32_t a)14987 uint32_t test_vcvts_n_u32_f32(float32_t a) {
14988   return (uint32_t)vcvts_n_u32_f32(a, 32);
14989 }
14990 
14991 // CHECK-LABEL: @test_vcvtd_n_u64_f64(
14992 // CHECK:   [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
14993 // CHECK:   ret i64 [[VCVTD_N_U64_F64]]
test_vcvtd_n_u64_f64(float64_t a)14994 uint64_t test_vcvtd_n_u64_f64(float64_t a) {
14995   return (uint64_t)vcvtd_n_u64_f64(a, 64);
14996 }
14997 
14998 // CHECK-LABEL: @test_vreinterpret_s8_s16(
14999 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15000 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)15001 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
15002   return vreinterpret_s8_s16(a);
15003 }
15004 
15005 // CHECK-LABEL: @test_vreinterpret_s8_s32(
15006 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15007 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)15008 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
15009   return vreinterpret_s8_s32(a);
15010 }
15011 
15012 // CHECK-LABEL: @test_vreinterpret_s8_s64(
15013 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15014 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)15015 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
15016   return vreinterpret_s8_s64(a);
15017 }
15018 
15019 // CHECK-LABEL: @test_vreinterpret_s8_u8(
15020 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)15021 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
15022   return vreinterpret_s8_u8(a);
15023 }
15024 
15025 // CHECK-LABEL: @test_vreinterpret_s8_u16(
15026 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15027 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)15028 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
15029   return vreinterpret_s8_u16(a);
15030 }
15031 
15032 // CHECK-LABEL: @test_vreinterpret_s8_u32(
15033 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15034 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)15035 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
15036   return vreinterpret_s8_u32(a);
15037 }
15038 
15039 // CHECK-LABEL: @test_vreinterpret_s8_u64(
15040 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15041 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)15042 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
15043   return vreinterpret_s8_u64(a);
15044 }
15045 
15046 // CHECK-LABEL: @test_vreinterpret_s8_f16(
15047 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15048 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)15049 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
15050   return vreinterpret_s8_f16(a);
15051 }
15052 
15053 // CHECK-LABEL: @test_vreinterpret_s8_f32(
15054 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15055 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)15056 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
15057   return vreinterpret_s8_f32(a);
15058 }
15059 
15060 // CHECK-LABEL: @test_vreinterpret_s8_f64(
15061 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15062 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f64(float64x1_t a)15063 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
15064   return vreinterpret_s8_f64(a);
15065 }
15066 
15067 // CHECK-LABEL: @test_vreinterpret_s8_p8(
15068 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)15069 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
15070   return vreinterpret_s8_p8(a);
15071 }
15072 
15073 // CHECK-LABEL: @test_vreinterpret_s8_p16(
15074 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15075 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)15076 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
15077   return vreinterpret_s8_p16(a);
15078 }
15079 
15080 // CHECK-LABEL: @test_vreinterpret_s8_p64(
15081 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15082 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p64(poly64x1_t a)15083 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
15084   return vreinterpret_s8_p64(a);
15085 }
15086 
15087 // CHECK-LABEL: @test_vreinterpret_s16_s8(
15088 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15089 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)15090 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
15091   return vreinterpret_s16_s8(a);
15092 }
15093 
15094 // CHECK-LABEL: @test_vreinterpret_s16_s32(
15095 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15096 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)15097 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
15098   return vreinterpret_s16_s32(a);
15099 }
15100 
15101 // CHECK-LABEL: @test_vreinterpret_s16_s64(
15102 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15103 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)15104 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
15105   return vreinterpret_s16_s64(a);
15106 }
15107 
15108 // CHECK-LABEL: @test_vreinterpret_s16_u8(
15109 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15110 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)15111 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
15112   return vreinterpret_s16_u8(a);
15113 }
15114 
15115 // CHECK-LABEL: @test_vreinterpret_s16_u16(
15116 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)15117 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
15118   return vreinterpret_s16_u16(a);
15119 }
15120 
15121 // CHECK-LABEL: @test_vreinterpret_s16_u32(
15122 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15123 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)15124 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
15125   return vreinterpret_s16_u32(a);
15126 }
15127 
15128 // CHECK-LABEL: @test_vreinterpret_s16_u64(
15129 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15130 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)15131 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
15132   return vreinterpret_s16_u64(a);
15133 }
15134 
15135 // CHECK-LABEL: @test_vreinterpret_s16_f16(
15136 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15137 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)15138 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
15139   return vreinterpret_s16_f16(a);
15140 }
15141 
15142 // CHECK-LABEL: @test_vreinterpret_s16_f32(
15143 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15144 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)15145 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
15146   return vreinterpret_s16_f32(a);
15147 }
15148 
15149 // CHECK-LABEL: @test_vreinterpret_s16_f64(
15150 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15151 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f64(float64x1_t a)15152 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
15153   return vreinterpret_s16_f64(a);
15154 }
15155 
15156 // CHECK-LABEL: @test_vreinterpret_s16_p8(
15157 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15158 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)15159 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
15160   return vreinterpret_s16_p8(a);
15161 }
15162 
15163 // CHECK-LABEL: @test_vreinterpret_s16_p16(
15164 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)15165 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
15166   return vreinterpret_s16_p16(a);
15167 }
15168 
15169 // CHECK-LABEL: @test_vreinterpret_s16_p64(
15170 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15171 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p64(poly64x1_t a)15172 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
15173   return vreinterpret_s16_p64(a);
15174 }
15175 
15176 // CHECK-LABEL: @test_vreinterpret_s32_s8(
15177 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15178 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)15179 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
15180   return vreinterpret_s32_s8(a);
15181 }
15182 
15183 // CHECK-LABEL: @test_vreinterpret_s32_s16(
15184 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15185 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)15186 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
15187   return vreinterpret_s32_s16(a);
15188 }
15189 
15190 // CHECK-LABEL: @test_vreinterpret_s32_s64(
15191 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15192 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)15193 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
15194   return vreinterpret_s32_s64(a);
15195 }
15196 
15197 // CHECK-LABEL: @test_vreinterpret_s32_u8(
15198 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15199 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)15200 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
15201   return vreinterpret_s32_u8(a);
15202 }
15203 
15204 // CHECK-LABEL: @test_vreinterpret_s32_u16(
15205 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15206 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)15207 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
15208   return vreinterpret_s32_u16(a);
15209 }
15210 
15211 // CHECK-LABEL: @test_vreinterpret_s32_u32(
15212 // CHECK:   ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)15213 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
15214   return vreinterpret_s32_u32(a);
15215 }
15216 
15217 // CHECK-LABEL: @test_vreinterpret_s32_u64(
15218 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15219 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)15220 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
15221   return vreinterpret_s32_u64(a);
15222 }
15223 
15224 // CHECK-LABEL: @test_vreinterpret_s32_f16(
15225 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15226 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)15227 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
15228   return vreinterpret_s32_f16(a);
15229 }
15230 
15231 // CHECK-LABEL: @test_vreinterpret_s32_f32(
15232 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15233 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)15234 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
15235   return vreinterpret_s32_f32(a);
15236 }
15237 
15238 // CHECK-LABEL: @test_vreinterpret_s32_f64(
15239 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15240 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f64(float64x1_t a)15241 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
15242   return vreinterpret_s32_f64(a);
15243 }
15244 
15245 // CHECK-LABEL: @test_vreinterpret_s32_p8(
15246 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15247 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)15248 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
15249   return vreinterpret_s32_p8(a);
15250 }
15251 
15252 // CHECK-LABEL: @test_vreinterpret_s32_p16(
15253 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15254 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)15255 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
15256   return vreinterpret_s32_p16(a);
15257 }
15258 
15259 // CHECK-LABEL: @test_vreinterpret_s32_p64(
15260 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15261 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p64(poly64x1_t a)15262 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
15263   return vreinterpret_s32_p64(a);
15264 }
15265 
15266 // CHECK-LABEL: @test_vreinterpret_s64_s8(
15267 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15268 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)15269 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
15270   return vreinterpret_s64_s8(a);
15271 }
15272 
15273 // CHECK-LABEL: @test_vreinterpret_s64_s16(
15274 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15275 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)15276 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
15277   return vreinterpret_s64_s16(a);
15278 }
15279 
15280 // CHECK-LABEL: @test_vreinterpret_s64_s32(
15281 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15282 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)15283 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
15284   return vreinterpret_s64_s32(a);
15285 }
15286 
15287 // CHECK-LABEL: @test_vreinterpret_s64_u8(
15288 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15289 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)15290 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
15291   return vreinterpret_s64_u8(a);
15292 }
15293 
15294 // CHECK-LABEL: @test_vreinterpret_s64_u16(
15295 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15296 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)15297 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
15298   return vreinterpret_s64_u16(a);
15299 }
15300 
15301 // CHECK-LABEL: @test_vreinterpret_s64_u32(
15302 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15303 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)15304 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
15305   return vreinterpret_s64_u32(a);
15306 }
15307 
15308 // CHECK-LABEL: @test_vreinterpret_s64_u64(
15309 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)15310 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
15311   return vreinterpret_s64_u64(a);
15312 }
15313 
15314 // CHECK-LABEL: @test_vreinterpret_s64_f16(
15315 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15316 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)15317 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
15318   return vreinterpret_s64_f16(a);
15319 }
15320 
15321 // CHECK-LABEL: @test_vreinterpret_s64_f32(
15322 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15323 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)15324 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
15325   return vreinterpret_s64_f32(a);
15326 }
15327 
15328 // CHECK-LABEL: @test_vreinterpret_s64_f64(
15329 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15330 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f64(float64x1_t a)15331 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
15332   return vreinterpret_s64_f64(a);
15333 }
15334 
15335 // CHECK-LABEL: @test_vreinterpret_s64_p8(
15336 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15337 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)15338 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
15339   return vreinterpret_s64_p8(a);
15340 }
15341 
15342 // CHECK-LABEL: @test_vreinterpret_s64_p16(
15343 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15344 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)15345 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
15346   return vreinterpret_s64_p16(a);
15347 }
15348 
15349 // CHECK-LABEL: @test_vreinterpret_s64_p64(
15350 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_p64(poly64x1_t a)15351 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
15352   return vreinterpret_s64_p64(a);
15353 }
15354 
15355 // CHECK-LABEL: @test_vreinterpret_u8_s8(
15356 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)15357 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
15358   return vreinterpret_u8_s8(a);
15359 }
15360 
15361 // CHECK-LABEL: @test_vreinterpret_u8_s16(
15362 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15363 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)15364 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
15365   return vreinterpret_u8_s16(a);
15366 }
15367 
15368 // CHECK-LABEL: @test_vreinterpret_u8_s32(
15369 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15370 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)15371 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
15372   return vreinterpret_u8_s32(a);
15373 }
15374 
15375 // CHECK-LABEL: @test_vreinterpret_u8_s64(
15376 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15377 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)15378 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
15379   return vreinterpret_u8_s64(a);
15380 }
15381 
15382 // CHECK-LABEL: @test_vreinterpret_u8_u16(
15383 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15384 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)15385 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
15386   return vreinterpret_u8_u16(a);
15387 }
15388 
15389 // CHECK-LABEL: @test_vreinterpret_u8_u32(
15390 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15391 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)15392 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
15393   return vreinterpret_u8_u32(a);
15394 }
15395 
15396 // CHECK-LABEL: @test_vreinterpret_u8_u64(
15397 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15398 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)15399 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
15400   return vreinterpret_u8_u64(a);
15401 }
15402 
15403 // CHECK-LABEL: @test_vreinterpret_u8_f16(
15404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15405 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)15406 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
15407   return vreinterpret_u8_f16(a);
15408 }
15409 
15410 // CHECK-LABEL: @test_vreinterpret_u8_f32(
15411 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15412 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)15413 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
15414   return vreinterpret_u8_f32(a);
15415 }
15416 
15417 // CHECK-LABEL: @test_vreinterpret_u8_f64(
15418 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15419 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f64(float64x1_t a)15420 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
15421   return vreinterpret_u8_f64(a);
15422 }
15423 
15424 // CHECK-LABEL: @test_vreinterpret_u8_p8(
15425 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)15426 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
15427   return vreinterpret_u8_p8(a);
15428 }
15429 
15430 // CHECK-LABEL: @test_vreinterpret_u8_p16(
15431 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15432 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)15433 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
15434   return vreinterpret_u8_p16(a);
15435 }
15436 
15437 // CHECK-LABEL: @test_vreinterpret_u8_p64(
15438 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15439 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p64(poly64x1_t a)15440 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
15441   return vreinterpret_u8_p64(a);
15442 }
15443 
15444 // CHECK-LABEL: @test_vreinterpret_u16_s8(
15445 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15446 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)15447 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
15448   return vreinterpret_u16_s8(a);
15449 }
15450 
15451 // CHECK-LABEL: @test_vreinterpret_u16_s16(
15452 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)15453 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
15454   return vreinterpret_u16_s16(a);
15455 }
15456 
15457 // CHECK-LABEL: @test_vreinterpret_u16_s32(
15458 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15459 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)15460 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
15461   return vreinterpret_u16_s32(a);
15462 }
15463 
15464 // CHECK-LABEL: @test_vreinterpret_u16_s64(
15465 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15466 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)15467 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
15468   return vreinterpret_u16_s64(a);
15469 }
15470 
15471 // CHECK-LABEL: @test_vreinterpret_u16_u8(
15472 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15473 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)15474 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
15475   return vreinterpret_u16_u8(a);
15476 }
15477 
15478 // CHECK-LABEL: @test_vreinterpret_u16_u32(
15479 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15480 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)15481 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
15482   return vreinterpret_u16_u32(a);
15483 }
15484 
15485 // CHECK-LABEL: @test_vreinterpret_u16_u64(
15486 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15487 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)15488 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
15489   return vreinterpret_u16_u64(a);
15490 }
15491 
15492 // CHECK-LABEL: @test_vreinterpret_u16_f16(
15493 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15494 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)15495 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
15496   return vreinterpret_u16_f16(a);
15497 }
15498 
15499 // CHECK-LABEL: @test_vreinterpret_u16_f32(
15500 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15501 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)15502 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
15503   return vreinterpret_u16_f32(a);
15504 }
15505 
15506 // CHECK-LABEL: @test_vreinterpret_u16_f64(
15507 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15508 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f64(float64x1_t a)15509 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
15510   return vreinterpret_u16_f64(a);
15511 }
15512 
15513 // CHECK-LABEL: @test_vreinterpret_u16_p8(
15514 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15515 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)15516 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
15517   return vreinterpret_u16_p8(a);
15518 }
15519 
15520 // CHECK-LABEL: @test_vreinterpret_u16_p16(
15521 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)15522 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
15523   return vreinterpret_u16_p16(a);
15524 }
15525 
15526 // CHECK-LABEL: @test_vreinterpret_u16_p64(
15527 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15528 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p64(poly64x1_t a)15529 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
15530   return vreinterpret_u16_p64(a);
15531 }
15532 
15533 // CHECK-LABEL: @test_vreinterpret_u32_s8(
15534 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15535 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)15536 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
15537   return vreinterpret_u32_s8(a);
15538 }
15539 
15540 // CHECK-LABEL: @test_vreinterpret_u32_s16(
15541 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15542 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)15543 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
15544   return vreinterpret_u32_s16(a);
15545 }
15546 
15547 // CHECK-LABEL: @test_vreinterpret_u32_s32(
15548 // CHECK:   ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)15549 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
15550   return vreinterpret_u32_s32(a);
15551 }
15552 
15553 // CHECK-LABEL: @test_vreinterpret_u32_s64(
15554 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15555 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)15556 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
15557   return vreinterpret_u32_s64(a);
15558 }
15559 
15560 // CHECK-LABEL: @test_vreinterpret_u32_u8(
15561 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15562 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)15563 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
15564   return vreinterpret_u32_u8(a);
15565 }
15566 
15567 // CHECK-LABEL: @test_vreinterpret_u32_u16(
15568 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15569 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)15570 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
15571   return vreinterpret_u32_u16(a);
15572 }
15573 
15574 // CHECK-LABEL: @test_vreinterpret_u32_u64(
15575 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15576 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)15577 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
15578   return vreinterpret_u32_u64(a);
15579 }
15580 
15581 // CHECK-LABEL: @test_vreinterpret_u32_f16(
15582 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15583 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)15584 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
15585   return vreinterpret_u32_f16(a);
15586 }
15587 
15588 // CHECK-LABEL: @test_vreinterpret_u32_f32(
15589 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15590 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)15591 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
15592   return vreinterpret_u32_f32(a);
15593 }
15594 
15595 // CHECK-LABEL: @test_vreinterpret_u32_f64(
15596 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15597 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f64(float64x1_t a)15598 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
15599   return vreinterpret_u32_f64(a);
15600 }
15601 
15602 // CHECK-LABEL: @test_vreinterpret_u32_p8(
15603 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15604 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)15605 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
15606   return vreinterpret_u32_p8(a);
15607 }
15608 
15609 // CHECK-LABEL: @test_vreinterpret_u32_p16(
15610 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15611 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)15612 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
15613   return vreinterpret_u32_p16(a);
15614 }
15615 
15616 // CHECK-LABEL: @test_vreinterpret_u32_p64(
15617 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15618 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p64(poly64x1_t a)15619 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
15620   return vreinterpret_u32_p64(a);
15621 }
15622 
15623 // CHECK-LABEL: @test_vreinterpret_u64_s8(
15624 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15625 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)15626 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
15627   return vreinterpret_u64_s8(a);
15628 }
15629 
15630 // CHECK-LABEL: @test_vreinterpret_u64_s16(
15631 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15632 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)15633 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
15634   return vreinterpret_u64_s16(a);
15635 }
15636 
15637 // CHECK-LABEL: @test_vreinterpret_u64_s32(
15638 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15639 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)15640 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
15641   return vreinterpret_u64_s32(a);
15642 }
15643 
15644 // CHECK-LABEL: @test_vreinterpret_u64_s64(
15645 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)15646 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
15647   return vreinterpret_u64_s64(a);
15648 }
15649 
15650 // CHECK-LABEL: @test_vreinterpret_u64_u8(
15651 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15652 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)15653 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
15654   return vreinterpret_u64_u8(a);
15655 }
15656 
15657 // CHECK-LABEL: @test_vreinterpret_u64_u16(
15658 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15659 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)15660 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
15661   return vreinterpret_u64_u16(a);
15662 }
15663 
15664 // CHECK-LABEL: @test_vreinterpret_u64_u32(
15665 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15666 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)15667 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
15668   return vreinterpret_u64_u32(a);
15669 }
15670 
15671 // CHECK-LABEL: @test_vreinterpret_u64_f16(
15672 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15673 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)15674 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
15675   return vreinterpret_u64_f16(a);
15676 }
15677 
15678 // CHECK-LABEL: @test_vreinterpret_u64_f32(
15679 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15680 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)15681 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
15682   return vreinterpret_u64_f32(a);
15683 }
15684 
15685 // CHECK-LABEL: @test_vreinterpret_u64_f64(
15686 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15687 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f64(float64x1_t a)15688 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
15689   return vreinterpret_u64_f64(a);
15690 }
15691 
15692 // CHECK-LABEL: @test_vreinterpret_u64_p8(
15693 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15694 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)15695 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
15696   return vreinterpret_u64_p8(a);
15697 }
15698 
15699 // CHECK-LABEL: @test_vreinterpret_u64_p16(
15700 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15701 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)15702 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
15703   return vreinterpret_u64_p16(a);
15704 }
15705 
15706 // CHECK-LABEL: @test_vreinterpret_u64_p64(
15707 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_p64(poly64x1_t a)15708 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
15709   return vreinterpret_u64_p64(a);
15710 }
15711 
15712 // CHECK-LABEL: @test_vreinterpret_f16_s8(
15713 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15714 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)15715 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
15716   return vreinterpret_f16_s8(a);
15717 }
15718 
15719 // CHECK-LABEL: @test_vreinterpret_f16_s16(
15720 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15721 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)15722 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
15723   return vreinterpret_f16_s16(a);
15724 }
15725 
15726 // CHECK-LABEL: @test_vreinterpret_f16_s32(
15727 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15728 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)15729 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
15730   return vreinterpret_f16_s32(a);
15731 }
15732 
15733 // CHECK-LABEL: @test_vreinterpret_f16_s64(
15734 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15735 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)15736 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
15737   return vreinterpret_f16_s64(a);
15738 }
15739 
15740 // CHECK-LABEL: @test_vreinterpret_f16_u8(
15741 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15742 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)15743 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
15744   return vreinterpret_f16_u8(a);
15745 }
15746 
15747 // CHECK-LABEL: @test_vreinterpret_f16_u16(
15748 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15749 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)15750 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
15751   return vreinterpret_f16_u16(a);
15752 }
15753 
15754 // CHECK-LABEL: @test_vreinterpret_f16_u32(
15755 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15756 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)15757 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
15758   return vreinterpret_f16_u32(a);
15759 }
15760 
15761 // CHECK-LABEL: @test_vreinterpret_f16_u64(
15762 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15763 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)15764 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
15765   return vreinterpret_f16_u64(a);
15766 }
15767 
15768 // CHECK-LABEL: @test_vreinterpret_f16_f32(
15769 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
15770 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)15771 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
15772   return vreinterpret_f16_f32(a);
15773 }
15774 
15775 // CHECK-LABEL: @test_vreinterpret_f16_f64(
15776 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
15777 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f64(float64x1_t a)15778 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
15779   return vreinterpret_f16_f64(a);
15780 }
15781 
15782 // CHECK-LABEL: @test_vreinterpret_f16_p8(
15783 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15784 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)15785 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
15786   return vreinterpret_f16_p8(a);
15787 }
15788 
15789 // CHECK-LABEL: @test_vreinterpret_f16_p16(
15790 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15791 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)15792 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
15793   return vreinterpret_f16_p16(a);
15794 }
15795 
15796 // CHECK-LABEL: @test_vreinterpret_f16_p64(
15797 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15798 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p64(poly64x1_t a)15799 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
15800   return vreinterpret_f16_p64(a);
15801 }
15802 
15803 // CHECK-LABEL: @test_vreinterpret_f32_s8(
15804 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15805 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)15806 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
15807   return vreinterpret_f32_s8(a);
15808 }
15809 
15810 // CHECK-LABEL: @test_vreinterpret_f32_s16(
15811 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15812 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)15813 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
15814   return vreinterpret_f32_s16(a);
15815 }
15816 
15817 // CHECK-LABEL: @test_vreinterpret_f32_s32(
15818 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15819 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)15820 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
15821   return vreinterpret_f32_s32(a);
15822 }
15823 
15824 // CHECK-LABEL: @test_vreinterpret_f32_s64(
15825 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15826 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)15827 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
15828   return vreinterpret_f32_s64(a);
15829 }
15830 
15831 // CHECK-LABEL: @test_vreinterpret_f32_u8(
15832 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15833 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)15834 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
15835   return vreinterpret_f32_u8(a);
15836 }
15837 
15838 // CHECK-LABEL: @test_vreinterpret_f32_u16(
15839 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15840 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)15841 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
15842   return vreinterpret_f32_u16(a);
15843 }
15844 
15845 // CHECK-LABEL: @test_vreinterpret_f32_u32(
15846 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15847 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)15848 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
15849   return vreinterpret_f32_u32(a);
15850 }
15851 
15852 // CHECK-LABEL: @test_vreinterpret_f32_u64(
15853 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15854 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)15855 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
15856   return vreinterpret_f32_u64(a);
15857 }
15858 
15859 // CHECK-LABEL: @test_vreinterpret_f32_f16(
15860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
15861 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)15862 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
15863   return vreinterpret_f32_f16(a);
15864 }
15865 
15866 // CHECK-LABEL: @test_vreinterpret_f32_f64(
15867 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
15868 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f64(float64x1_t a)15869 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
15870   return vreinterpret_f32_f64(a);
15871 }
15872 
15873 // CHECK-LABEL: @test_vreinterpret_f32_p8(
15874 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15875 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)15876 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
15877   return vreinterpret_f32_p8(a);
15878 }
15879 
15880 // CHECK-LABEL: @test_vreinterpret_f32_p16(
15881 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15882 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)15883 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
15884   return vreinterpret_f32_p16(a);
15885 }
15886 
15887 // CHECK-LABEL: @test_vreinterpret_f32_p64(
15888 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15889 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p64(poly64x1_t a)15890 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
15891   return vreinterpret_f32_p64(a);
15892 }
15893 
15894 // CHECK-LABEL: @test_vreinterpret_f64_s8(
15895 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15896 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s8(int8x8_t a)15897 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
15898   return vreinterpret_f64_s8(a);
15899 }
15900 
15901 // CHECK-LABEL: @test_vreinterpret_f64_s16(
15902 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15903 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s16(int16x4_t a)15904 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
15905   return vreinterpret_f64_s16(a);
15906 }
15907 
15908 // CHECK-LABEL: @test_vreinterpret_f64_s32(
15909 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15910 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s32(int32x2_t a)15911 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
15912   return vreinterpret_f64_s32(a);
15913 }
15914 
15915 // CHECK-LABEL: @test_vreinterpret_f64_s64(
15916 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15917 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s64(int64x1_t a)15918 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
15919   return vreinterpret_f64_s64(a);
15920 }
15921 
15922 // CHECK-LABEL: @test_vreinterpret_f64_u8(
15923 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15924 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u8(uint8x8_t a)15925 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
15926   return vreinterpret_f64_u8(a);
15927 }
15928 
15929 // CHECK-LABEL: @test_vreinterpret_f64_u16(
15930 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15931 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u16(uint16x4_t a)15932 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
15933   return vreinterpret_f64_u16(a);
15934 }
15935 
15936 // CHECK-LABEL: @test_vreinterpret_f64_u32(
15937 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15938 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u32(uint32x2_t a)15939 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
15940   return vreinterpret_f64_u32(a);
15941 }
15942 
15943 // CHECK-LABEL: @test_vreinterpret_f64_u64(
15944 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15945 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u64(uint64x1_t a)15946 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
15947   return vreinterpret_f64_u64(a);
15948 }
15949 
15950 // CHECK-LABEL: @test_vreinterpret_f64_f16(
15951 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
15952 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f16(float16x4_t a)15953 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
15954   return vreinterpret_f64_f16(a);
15955 }
15956 
15957 // CHECK-LABEL: @test_vreinterpret_f64_f32(
15958 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
15959 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f32(float32x2_t a)15960 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
15961   return vreinterpret_f64_f32(a);
15962 }
15963 
15964 // CHECK-LABEL: @test_vreinterpret_f64_p8(
15965 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15966 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p8(poly8x8_t a)15967 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
15968   return vreinterpret_f64_p8(a);
15969 }
15970 
15971 // CHECK-LABEL: @test_vreinterpret_f64_p16(
15972 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15973 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p16(poly16x4_t a)15974 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
15975   return vreinterpret_f64_p16(a);
15976 }
15977 
15978 // CHECK-LABEL: @test_vreinterpret_f64_p64(
15979 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15980 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p64(poly64x1_t a)15981 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
15982   return vreinterpret_f64_p64(a);
15983 }
15984 
15985 // CHECK-LABEL: @test_vreinterpret_p8_s8(
15986 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)15987 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
15988   return vreinterpret_p8_s8(a);
15989 }
15990 
15991 // CHECK-LABEL: @test_vreinterpret_p8_s16(
15992 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15993 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)15994 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
15995   return vreinterpret_p8_s16(a);
15996 }
15997 
15998 // CHECK-LABEL: @test_vreinterpret_p8_s32(
15999 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16000 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)16001 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
16002   return vreinterpret_p8_s32(a);
16003 }
16004 
16005 // CHECK-LABEL: @test_vreinterpret_p8_s64(
16006 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16007 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)16008 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
16009   return vreinterpret_p8_s64(a);
16010 }
16011 
16012 // CHECK-LABEL: @test_vreinterpret_p8_u8(
16013 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)16014 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
16015   return vreinterpret_p8_u8(a);
16016 }
16017 
16018 // CHECK-LABEL: @test_vreinterpret_p8_u16(
16019 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16020 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)16021 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
16022   return vreinterpret_p8_u16(a);
16023 }
16024 
16025 // CHECK-LABEL: @test_vreinterpret_p8_u32(
16026 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16027 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)16028 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
16029   return vreinterpret_p8_u32(a);
16030 }
16031 
16032 // CHECK-LABEL: @test_vreinterpret_p8_u64(
16033 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16034 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)16035 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
16036   return vreinterpret_p8_u64(a);
16037 }
16038 
16039 // CHECK-LABEL: @test_vreinterpret_p8_f16(
16040 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
16041 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)16042 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
16043   return vreinterpret_p8_f16(a);
16044 }
16045 
16046 // CHECK-LABEL: @test_vreinterpret_p8_f32(
16047 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
16048 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)16049 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
16050   return vreinterpret_p8_f32(a);
16051 }
16052 
16053 // CHECK-LABEL: @test_vreinterpret_p8_f64(
16054 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
16055 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f64(float64x1_t a)16056 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
16057   return vreinterpret_p8_f64(a);
16058 }
16059 
16060 // CHECK-LABEL: @test_vreinterpret_p8_p16(
16061 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16062 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)16063 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
16064   return vreinterpret_p8_p16(a);
16065 }
16066 
16067 // CHECK-LABEL: @test_vreinterpret_p8_p64(
16068 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16069 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p64(poly64x1_t a)16070 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
16071   return vreinterpret_p8_p64(a);
16072 }
16073 
16074 // CHECK-LABEL: @test_vreinterpret_p16_s8(
16075 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16076 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)16077 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
16078   return vreinterpret_p16_s8(a);
16079 }
16080 
16081 // CHECK-LABEL: @test_vreinterpret_p16_s16(
16082 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)16083 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
16084   return vreinterpret_p16_s16(a);
16085 }
16086 
16087 // CHECK-LABEL: @test_vreinterpret_p16_s32(
16088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16089 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)16090 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
16091   return vreinterpret_p16_s32(a);
16092 }
16093 
16094 // CHECK-LABEL: @test_vreinterpret_p16_s64(
16095 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16096 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)16097 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
16098   return vreinterpret_p16_s64(a);
16099 }
16100 
16101 // CHECK-LABEL: @test_vreinterpret_p16_u8(
16102 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16103 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)16104 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
16105   return vreinterpret_p16_u8(a);
16106 }
16107 
16108 // CHECK-LABEL: @test_vreinterpret_p16_u16(
16109 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)16110 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
16111   return vreinterpret_p16_u16(a);
16112 }
16113 
16114 // CHECK-LABEL: @test_vreinterpret_p16_u32(
16115 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16116 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)16117 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
16118   return vreinterpret_p16_u32(a);
16119 }
16120 
16121 // CHECK-LABEL: @test_vreinterpret_p16_u64(
16122 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16123 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)16124 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
16125   return vreinterpret_p16_u64(a);
16126 }
16127 
16128 // CHECK-LABEL: @test_vreinterpret_p16_f16(
16129 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
16130 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)16131 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
16132   return vreinterpret_p16_f16(a);
16133 }
16134 
16135 // CHECK-LABEL: @test_vreinterpret_p16_f32(
16136 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
16137 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)16138 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
16139   return vreinterpret_p16_f32(a);
16140 }
16141 
16142 // CHECK-LABEL: @test_vreinterpret_p16_f64(
16143 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
16144 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f64(float64x1_t a)16145 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
16146   return vreinterpret_p16_f64(a);
16147 }
16148 
16149 // CHECK-LABEL: @test_vreinterpret_p16_p8(
16150 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16151 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)16152 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
16153   return vreinterpret_p16_p8(a);
16154 }
16155 
16156 // CHECK-LABEL: @test_vreinterpret_p16_p64(
16157 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16158 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p64(poly64x1_t a)16159 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
16160   return vreinterpret_p16_p64(a);
16161 }
16162 
16163 // CHECK-LABEL: @test_vreinterpret_p64_s8(
16164 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16165 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s8(int8x8_t a)16166 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
16167   return vreinterpret_p64_s8(a);
16168 }
16169 
16170 // CHECK-LABEL: @test_vreinterpret_p64_s16(
16171 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16172 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s16(int16x4_t a)16173 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
16174   return vreinterpret_p64_s16(a);
16175 }
16176 
16177 // CHECK-LABEL: @test_vreinterpret_p64_s32(
16178 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16179 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s32(int32x2_t a)16180 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
16181   return vreinterpret_p64_s32(a);
16182 }
16183 
16184 // CHECK-LABEL: @test_vreinterpret_p64_s64(
16185 // CHECK:   ret <1 x i64> %a
test_vreinterpret_p64_s64(int64x1_t a)16186 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
16187   return vreinterpret_p64_s64(a);
16188 }
16189 
16190 // CHECK-LABEL: @test_vreinterpret_p64_u8(
16191 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16192 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u8(uint8x8_t a)16193 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
16194   return vreinterpret_p64_u8(a);
16195 }
16196 
16197 // CHECK-LABEL: @test_vreinterpret_p64_u16(
16198 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16199 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u16(uint16x4_t a)16200 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
16201   return vreinterpret_p64_u16(a);
16202 }
16203 
16204 // CHECK-LABEL: @test_vreinterpret_p64_u32(
16205 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16206 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u32(uint32x2_t a)16207 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
16208   return vreinterpret_p64_u32(a);
16209 }
16210 
16211 // CHECK-LABEL: @test_vreinterpret_p64_u64(
16212 // CHECK:   ret <1 x i64> %a
test_vreinterpret_p64_u64(uint64x1_t a)16213 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
16214   return vreinterpret_p64_u64(a);
16215 }
16216 
16217 // CHECK-LABEL: @test_vreinterpret_p64_f16(
16218 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
16219 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f16(float16x4_t a)16220 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
16221   return vreinterpret_p64_f16(a);
16222 }
16223 
16224 // CHECK-LABEL: @test_vreinterpret_p64_f32(
16225 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
16226 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f32(float32x2_t a)16227 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
16228   return vreinterpret_p64_f32(a);
16229 }
16230 
16231 // CHECK-LABEL: @test_vreinterpret_p64_f64(
16232 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
16233 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f64(float64x1_t a)16234 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
16235   return vreinterpret_p64_f64(a);
16236 }
16237 
16238 // CHECK-LABEL: @test_vreinterpret_p64_p8(
16239 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16240 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p8(poly8x8_t a)16241 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
16242   return vreinterpret_p64_p8(a);
16243 }
16244 
16245 // CHECK-LABEL: @test_vreinterpret_p64_p16(
16246 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16247 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p16(poly16x4_t a)16248 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
16249   return vreinterpret_p64_p16(a);
16250 }
16251 
16252 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
16253 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16254 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)16255 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
16256   return vreinterpretq_s8_s16(a);
16257 }
16258 
16259 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
16260 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16261 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)16262 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
16263   return vreinterpretq_s8_s32(a);
16264 }
16265 
16266 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
16267 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16268 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)16269 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
16270   return vreinterpretq_s8_s64(a);
16271 }
16272 
16273 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
16274 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)16275 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
16276   return vreinterpretq_s8_u8(a);
16277 }
16278 
16279 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
16280 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16281 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)16282 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
16283   return vreinterpretq_s8_u16(a);
16284 }
16285 
16286 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
16287 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16288 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)16289 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
16290   return vreinterpretq_s8_u32(a);
16291 }
16292 
16293 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
16294 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16295 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)16296 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
16297   return vreinterpretq_s8_u64(a);
16298 }
16299 
16300 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
16301 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16302 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)16303 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
16304   return vreinterpretq_s8_f16(a);
16305 }
16306 
16307 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
16308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16309 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)16310 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
16311   return vreinterpretq_s8_f32(a);
16312 }
16313 
16314 // CHECK-LABEL: @test_vreinterpretq_s8_f64(
16315 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16316 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f64(float64x2_t a)16317 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
16318   return vreinterpretq_s8_f64(a);
16319 }
16320 
16321 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
16322 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)16323 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
16324   return vreinterpretq_s8_p8(a);
16325 }
16326 
16327 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
16328 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16329 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)16330 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
16331   return vreinterpretq_s8_p16(a);
16332 }
16333 
16334 // CHECK-LABEL: @test_vreinterpretq_s8_p64(
16335 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16336 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p64(poly64x2_t a)16337 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
16338   return vreinterpretq_s8_p64(a);
16339 }
16340 
16341 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
16342 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16343 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)16344 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
16345   return vreinterpretq_s16_s8(a);
16346 }
16347 
16348 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
16349 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16350 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)16351 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
16352   return vreinterpretq_s16_s32(a);
16353 }
16354 
16355 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
16356 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16357 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)16358 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
16359   return vreinterpretq_s16_s64(a);
16360 }
16361 
16362 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
16363 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16364 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)16365 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
16366   return vreinterpretq_s16_u8(a);
16367 }
16368 
16369 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
16370 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)16371 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
16372   return vreinterpretq_s16_u16(a);
16373 }
16374 
16375 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
16376 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16377 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)16378 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
16379   return vreinterpretq_s16_u32(a);
16380 }
16381 
16382 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
16383 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16384 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)16385 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
16386   return vreinterpretq_s16_u64(a);
16387 }
16388 
16389 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
16390 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16391 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)16392 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
16393   return vreinterpretq_s16_f16(a);
16394 }
16395 
16396 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
16397 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16398 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)16399 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
16400   return vreinterpretq_s16_f32(a);
16401 }
16402 
16403 // CHECK-LABEL: @test_vreinterpretq_s16_f64(
16404 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16405 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f64(float64x2_t a)16406 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
16407   return vreinterpretq_s16_f64(a);
16408 }
16409 
16410 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
16411 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16412 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)16413 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
16414   return vreinterpretq_s16_p8(a);
16415 }
16416 
16417 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
16418 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)16419 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
16420   return vreinterpretq_s16_p16(a);
16421 }
16422 
16423 // CHECK-LABEL: @test_vreinterpretq_s16_p64(
16424 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16425 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p64(poly64x2_t a)16426 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
16427   return vreinterpretq_s16_p64(a);
16428 }
16429 
16430 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
16431 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16432 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)16433 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
16434   return vreinterpretq_s32_s8(a);
16435 }
16436 
16437 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
16438 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16439 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)16440 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
16441   return vreinterpretq_s32_s16(a);
16442 }
16443 
16444 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
16445 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16446 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)16447 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
16448   return vreinterpretq_s32_s64(a);
16449 }
16450 
16451 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
16452 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16453 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)16454 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
16455   return vreinterpretq_s32_u8(a);
16456 }
16457 
16458 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
16459 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16460 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)16461 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
16462   return vreinterpretq_s32_u16(a);
16463 }
16464 
16465 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
16466 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)16467 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
16468   return vreinterpretq_s32_u32(a);
16469 }
16470 
16471 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
16472 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16473 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)16474 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
16475   return vreinterpretq_s32_u64(a);
16476 }
16477 
16478 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
16479 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16480 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)16481 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
16482   return vreinterpretq_s32_f16(a);
16483 }
16484 
16485 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
16486 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16487 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)16488 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
16489   return vreinterpretq_s32_f32(a);
16490 }
16491 
16492 // CHECK-LABEL: @test_vreinterpretq_s32_f64(
16493 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16494 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f64(float64x2_t a)16495 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
16496   return vreinterpretq_s32_f64(a);
16497 }
16498 
16499 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
16500 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16501 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)16502 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
16503   return vreinterpretq_s32_p8(a);
16504 }
16505 
16506 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
16507 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16508 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)16509 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
16510   return vreinterpretq_s32_p16(a);
16511 }
16512 
16513 // CHECK-LABEL: @test_vreinterpretq_s32_p64(
16514 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16515 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p64(poly64x2_t a)16516 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
16517   return vreinterpretq_s32_p64(a);
16518 }
16519 
16520 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
16521 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16522 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)16523 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
16524   return vreinterpretq_s64_s8(a);
16525 }
16526 
16527 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
16528 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16529 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)16530 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
16531   return vreinterpretq_s64_s16(a);
16532 }
16533 
16534 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
16535 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16536 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)16537 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
16538   return vreinterpretq_s64_s32(a);
16539 }
16540 
16541 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
16542 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16543 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)16544 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
16545   return vreinterpretq_s64_u8(a);
16546 }
16547 
16548 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
16549 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16550 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)16551 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
16552   return vreinterpretq_s64_u16(a);
16553 }
16554 
16555 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
16556 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16557 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)16558 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
16559   return vreinterpretq_s64_u32(a);
16560 }
16561 
16562 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
16563 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)16564 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
16565   return vreinterpretq_s64_u64(a);
16566 }
16567 
16568 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
16569 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16570 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)16571 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
16572   return vreinterpretq_s64_f16(a);
16573 }
16574 
16575 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
16576 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16577 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)16578 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
16579   return vreinterpretq_s64_f32(a);
16580 }
16581 
16582 // CHECK-LABEL: @test_vreinterpretq_s64_f64(
16583 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16584 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f64(float64x2_t a)16585 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
16586   return vreinterpretq_s64_f64(a);
16587 }
16588 
16589 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
16590 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16591 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)16592 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
16593   return vreinterpretq_s64_p8(a);
16594 }
16595 
16596 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
16597 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16598 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)16599 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
16600   return vreinterpretq_s64_p16(a);
16601 }
16602 
16603 // CHECK-LABEL: @test_vreinterpretq_s64_p64(
16604 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_p64(poly64x2_t a)16605 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
16606   return vreinterpretq_s64_p64(a);
16607 }
16608 
16609 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
16610 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)16611 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
16612   return vreinterpretq_u8_s8(a);
16613 }
16614 
16615 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
16616 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16617 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)16618 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
16619   return vreinterpretq_u8_s16(a);
16620 }
16621 
16622 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
16623 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16624 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)16625 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
16626   return vreinterpretq_u8_s32(a);
16627 }
16628 
16629 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
16630 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16631 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)16632 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
16633   return vreinterpretq_u8_s64(a);
16634 }
16635 
16636 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
16637 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16638 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)16639 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
16640   return vreinterpretq_u8_u16(a);
16641 }
16642 
16643 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
16644 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16645 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)16646 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
16647   return vreinterpretq_u8_u32(a);
16648 }
16649 
16650 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
16651 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16652 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)16653 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
16654   return vreinterpretq_u8_u64(a);
16655 }
16656 
16657 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
16658 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16659 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)16660 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
16661   return vreinterpretq_u8_f16(a);
16662 }
16663 
16664 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
16665 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16666 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)16667 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
16668   return vreinterpretq_u8_f32(a);
16669 }
16670 
16671 // CHECK-LABEL: @test_vreinterpretq_u8_f64(
16672 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16673 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f64(float64x2_t a)16674 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
16675   return vreinterpretq_u8_f64(a);
16676 }
16677 
16678 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
16679 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)16680 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
16681   return vreinterpretq_u8_p8(a);
16682 }
16683 
16684 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
16685 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16686 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)16687 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
16688   return vreinterpretq_u8_p16(a);
16689 }
16690 
16691 // CHECK-LABEL: @test_vreinterpretq_u8_p64(
16692 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16693 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p64(poly64x2_t a)16694 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
16695   return vreinterpretq_u8_p64(a);
16696 }
16697 
16698 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
16699 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16700 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)16701 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
16702   return vreinterpretq_u16_s8(a);
16703 }
16704 
16705 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
16706 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)16707 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
16708   return vreinterpretq_u16_s16(a);
16709 }
16710 
16711 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
16712 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16713 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)16714 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
16715   return vreinterpretq_u16_s32(a);
16716 }
16717 
16718 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
16719 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16720 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)16721 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
16722   return vreinterpretq_u16_s64(a);
16723 }
16724 
16725 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
16726 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16727 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)16728 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
16729   return vreinterpretq_u16_u8(a);
16730 }
16731 
16732 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
16733 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16734 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)16735 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
16736   return vreinterpretq_u16_u32(a);
16737 }
16738 
16739 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
16740 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16741 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)16742 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
16743   return vreinterpretq_u16_u64(a);
16744 }
16745 
16746 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
16747 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16748 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)16749 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
16750   return vreinterpretq_u16_f16(a);
16751 }
16752 
16753 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
16754 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16755 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)16756 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
16757   return vreinterpretq_u16_f32(a);
16758 }
16759 
16760 // CHECK-LABEL: @test_vreinterpretq_u16_f64(
16761 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16762 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f64(float64x2_t a)16763 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
16764   return vreinterpretq_u16_f64(a);
16765 }
16766 
16767 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
16768 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16769 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)16770 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
16771   return vreinterpretq_u16_p8(a);
16772 }
16773 
16774 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
16775 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)16776 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
16777   return vreinterpretq_u16_p16(a);
16778 }
16779 
16780 // CHECK-LABEL: @test_vreinterpretq_u16_p64(
16781 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16782 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p64(poly64x2_t a)16783 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
16784   return vreinterpretq_u16_p64(a);
16785 }
16786 
16787 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
16788 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16789 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)16790 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
16791   return vreinterpretq_u32_s8(a);
16792 }
16793 
16794 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
16795 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16796 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)16797 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
16798   return vreinterpretq_u32_s16(a);
16799 }
16800 
16801 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
16802 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)16803 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
16804   return vreinterpretq_u32_s32(a);
16805 }
16806 
16807 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
16808 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16809 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)16810 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
16811   return vreinterpretq_u32_s64(a);
16812 }
16813 
16814 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
16815 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16816 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)16817 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
16818   return vreinterpretq_u32_u8(a);
16819 }
16820 
16821 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
16822 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16823 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)16824 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
16825   return vreinterpretq_u32_u16(a);
16826 }
16827 
16828 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
16829 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16830 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)16831 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
16832   return vreinterpretq_u32_u64(a);
16833 }
16834 
16835 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
16836 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16837 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)16838 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
16839   return vreinterpretq_u32_f16(a);
16840 }
16841 
16842 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
16843 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16844 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)16845 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
16846   return vreinterpretq_u32_f32(a);
16847 }
16848 
16849 // CHECK-LABEL: @test_vreinterpretq_u32_f64(
16850 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16851 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f64(float64x2_t a)16852 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
16853   return vreinterpretq_u32_f64(a);
16854 }
16855 
16856 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
16857 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16858 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)16859 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
16860   return vreinterpretq_u32_p8(a);
16861 }
16862 
16863 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
16864 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16865 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)16866 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
16867   return vreinterpretq_u32_p16(a);
16868 }
16869 
16870 // CHECK-LABEL: @test_vreinterpretq_u32_p64(
16871 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16872 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p64(poly64x2_t a)16873 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
16874   return vreinterpretq_u32_p64(a);
16875 }
16876 
16877 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
16878 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16879 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)16880 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
16881   return vreinterpretq_u64_s8(a);
16882 }
16883 
16884 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
16885 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16886 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)16887 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
16888   return vreinterpretq_u64_s16(a);
16889 }
16890 
16891 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
16892 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16893 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)16894 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
16895   return vreinterpretq_u64_s32(a);
16896 }
16897 
16898 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
16899 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)16900 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
16901   return vreinterpretq_u64_s64(a);
16902 }
16903 
16904 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
16905 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16906 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)16907 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
16908   return vreinterpretq_u64_u8(a);
16909 }
16910 
16911 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
16912 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16913 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)16914 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
16915   return vreinterpretq_u64_u16(a);
16916 }
16917 
16918 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
16919 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16920 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)16921 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
16922   return vreinterpretq_u64_u32(a);
16923 }
16924 
16925 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
16926 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16927 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)16928 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
16929   return vreinterpretq_u64_f16(a);
16930 }
16931 
16932 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
16933 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16934 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)16935 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
16936   return vreinterpretq_u64_f32(a);
16937 }
16938 
16939 // CHECK-LABEL: @test_vreinterpretq_u64_f64(
16940 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16941 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f64(float64x2_t a)16942 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
16943   return vreinterpretq_u64_f64(a);
16944 }
16945 
16946 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
16947 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16948 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)16949 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
16950   return vreinterpretq_u64_p8(a);
16951 }
16952 
16953 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
16954 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16955 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)16956 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
16957   return vreinterpretq_u64_p16(a);
16958 }
16959 
16960 // CHECK-LABEL: @test_vreinterpretq_u64_p64(
16961 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_p64(poly64x2_t a)16962 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
16963   return vreinterpretq_u64_p64(a);
16964 }
16965 
16966 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
16967 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16968 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)16969 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
16970   return vreinterpretq_f16_s8(a);
16971 }
16972 
16973 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
16974 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16975 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)16976 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
16977   return vreinterpretq_f16_s16(a);
16978 }
16979 
16980 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
16981 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
16982 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)16983 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
16984   return vreinterpretq_f16_s32(a);
16985 }
16986 
16987 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
16988 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
16989 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)16990 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
16991   return vreinterpretq_f16_s64(a);
16992 }
16993 
16994 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
16995 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16996 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)16997 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
16998   return vreinterpretq_f16_u8(a);
16999 }
17000 
17001 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
17002 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17003 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)17004 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
17005   return vreinterpretq_f16_u16(a);
17006 }
17007 
17008 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
17009 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
17010 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)17011 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
17012   return vreinterpretq_f16_u32(a);
17013 }
17014 
17015 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
17016 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17017 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)17018 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
17019   return vreinterpretq_f16_u64(a);
17020 }
17021 
17022 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
17023 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
17024 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)17025 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
17026   return vreinterpretq_f16_f32(a);
17027 }
17028 
17029 // CHECK-LABEL: @test_vreinterpretq_f16_f64(
17030 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
17031 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f64(float64x2_t a)17032 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
17033   return vreinterpretq_f16_f64(a);
17034 }
17035 
17036 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
17037 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
17038 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)17039 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
17040   return vreinterpretq_f16_p8(a);
17041 }
17042 
17043 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
17044 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17045 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)17046 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
17047   return vreinterpretq_f16_p16(a);
17048 }
17049 
17050 // CHECK-LABEL: @test_vreinterpretq_f16_p64(
17051 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17052 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p64(poly64x2_t a)17053 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
17054   return vreinterpretq_f16_p64(a);
17055 }
17056 
17057 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
17058 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17059 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)17060 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
17061   return vreinterpretq_f32_s8(a);
17062 }
17063 
17064 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
17065 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17066 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)17067 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
17068   return vreinterpretq_f32_s16(a);
17069 }
17070 
17071 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
17072 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17073 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)17074 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
17075   return vreinterpretq_f32_s32(a);
17076 }
17077 
17078 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
17079 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17080 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)17081 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
17082   return vreinterpretq_f32_s64(a);
17083 }
17084 
17085 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
17086 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17087 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)17088 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
17089   return vreinterpretq_f32_u8(a);
17090 }
17091 
17092 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
17093 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17094 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)17095 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
17096   return vreinterpretq_f32_u16(a);
17097 }
17098 
17099 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
17100 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17101 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)17102 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
17103   return vreinterpretq_f32_u32(a);
17104 }
17105 
17106 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
17107 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17108 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)17109 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
17110   return vreinterpretq_f32_u64(a);
17111 }
17112 
17113 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
17114 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
17115 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)17116 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
17117   return vreinterpretq_f32_f16(a);
17118 }
17119 
17120 // CHECK-LABEL: @test_vreinterpretq_f32_f64(
17121 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
17122 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f64(float64x2_t a)17123 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
17124   return vreinterpretq_f32_f64(a);
17125 }
17126 
17127 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
17128 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17129 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)17130 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
17131   return vreinterpretq_f32_p8(a);
17132 }
17133 
17134 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
17135 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17136 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)17137 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
17138   return vreinterpretq_f32_p16(a);
17139 }
17140 
17141 // CHECK-LABEL: @test_vreinterpretq_f32_p64(
17142 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17143 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p64(poly64x2_t a)17144 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
17145   return vreinterpretq_f32_p64(a);
17146 }
17147 
17148 // CHECK-LABEL: @test_vreinterpretq_f64_s8(
17149 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17150 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s8(int8x16_t a)17151 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
17152   return vreinterpretq_f64_s8(a);
17153 }
17154 
17155 // CHECK-LABEL: @test_vreinterpretq_f64_s16(
17156 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17157 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s16(int16x8_t a)17158 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
17159   return vreinterpretq_f64_s16(a);
17160 }
17161 
17162 // CHECK-LABEL: @test_vreinterpretq_f64_s32(
17163 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17164 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s32(int32x4_t a)17165 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
17166   return vreinterpretq_f64_s32(a);
17167 }
17168 
17169 // CHECK-LABEL: @test_vreinterpretq_f64_s64(
17170 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17171 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s64(int64x2_t a)17172 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
17173   return vreinterpretq_f64_s64(a);
17174 }
17175 
17176 // CHECK-LABEL: @test_vreinterpretq_f64_u8(
17177 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17178 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u8(uint8x16_t a)17179 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
17180   return vreinterpretq_f64_u8(a);
17181 }
17182 
17183 // CHECK-LABEL: @test_vreinterpretq_f64_u16(
17184 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17185 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u16(uint16x8_t a)17186 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
17187   return vreinterpretq_f64_u16(a);
17188 }
17189 
17190 // CHECK-LABEL: @test_vreinterpretq_f64_u32(
17191 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17192 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u32(uint32x4_t a)17193 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
17194   return vreinterpretq_f64_u32(a);
17195 }
17196 
17197 // CHECK-LABEL: @test_vreinterpretq_f64_u64(
17198 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17199 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u64(uint64x2_t a)17200 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
17201   return vreinterpretq_f64_u64(a);
17202 }
17203 
17204 // CHECK-LABEL: @test_vreinterpretq_f64_f16(
17205 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
17206 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f16(float16x8_t a)17207 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
17208   return vreinterpretq_f64_f16(a);
17209 }
17210 
17211 // CHECK-LABEL: @test_vreinterpretq_f64_f32(
17212 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
17213 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f32(float32x4_t a)17214 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
17215   return vreinterpretq_f64_f32(a);
17216 }
17217 
17218 // CHECK-LABEL: @test_vreinterpretq_f64_p8(
17219 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17220 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p8(poly8x16_t a)17221 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
17222   return vreinterpretq_f64_p8(a);
17223 }
17224 
17225 // CHECK-LABEL: @test_vreinterpretq_f64_p16(
17226 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17227 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p16(poly16x8_t a)17228 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
17229   return vreinterpretq_f64_p16(a);
17230 }
17231 
17232 // CHECK-LABEL: @test_vreinterpretq_f64_p64(
17233 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17234 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p64(poly64x2_t a)17235 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
17236   return vreinterpretq_f64_p64(a);
17237 }
17238 
17239 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
17240 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)17241 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
17242   return vreinterpretq_p8_s8(a);
17243 }
17244 
17245 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
17246 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17247 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)17248 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
17249   return vreinterpretq_p8_s16(a);
17250 }
17251 
17252 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
17253 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17254 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)17255 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
17256   return vreinterpretq_p8_s32(a);
17257 }
17258 
17259 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
17260 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17261 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)17262 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
17263   return vreinterpretq_p8_s64(a);
17264 }
17265 
17266 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
17267 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)17268 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
17269   return vreinterpretq_p8_u8(a);
17270 }
17271 
17272 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
17273 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17274 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)17275 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
17276   return vreinterpretq_p8_u16(a);
17277 }
17278 
17279 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
17280 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17281 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)17282 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
17283   return vreinterpretq_p8_u32(a);
17284 }
17285 
17286 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
17287 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17288 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)17289 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
17290   return vreinterpretq_p8_u64(a);
17291 }
17292 
17293 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
17294 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
17295 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)17296 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
17297   return vreinterpretq_p8_f16(a);
17298 }
17299 
17300 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
17301 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
17302 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)17303 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
17304   return vreinterpretq_p8_f32(a);
17305 }
17306 
17307 // CHECK-LABEL: @test_vreinterpretq_p8_f64(
17308 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17309 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f64(float64x2_t a)17310 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
17311   return vreinterpretq_p8_f64(a);
17312 }
17313 
17314 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
17315 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17316 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)17317 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
17318   return vreinterpretq_p8_p16(a);
17319 }
17320 
17321 // CHECK-LABEL: @test_vreinterpretq_p8_p64(
17322 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17323 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p64(poly64x2_t a)17324 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
17325   return vreinterpretq_p8_p64(a);
17326 }
17327 
17328 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
17329 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17330 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)17331 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
17332   return vreinterpretq_p16_s8(a);
17333 }
17334 
17335 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
17336 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)17337 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
17338   return vreinterpretq_p16_s16(a);
17339 }
17340 
17341 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
17342 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17343 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)17344 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
17345   return vreinterpretq_p16_s32(a);
17346 }
17347 
17348 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
17349 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17350 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)17351 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
17352   return vreinterpretq_p16_s64(a);
17353 }
17354 
17355 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
17356 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17357 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)17358 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
17359   return vreinterpretq_p16_u8(a);
17360 }
17361 
17362 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
17363 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)17364 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
17365   return vreinterpretq_p16_u16(a);
17366 }
17367 
17368 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
17369 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17370 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)17371 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
17372   return vreinterpretq_p16_u32(a);
17373 }
17374 
17375 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
17376 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17377 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)17378 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
17379   return vreinterpretq_p16_u64(a);
17380 }
17381 
17382 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
17383 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
17384 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)17385 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
17386   return vreinterpretq_p16_f16(a);
17387 }
17388 
17389 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
17390 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
17391 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)17392 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
17393   return vreinterpretq_p16_f32(a);
17394 }
17395 
17396 // CHECK-LABEL: @test_vreinterpretq_p16_f64(
17397 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
17398 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f64(float64x2_t a)17399 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
17400   return vreinterpretq_p16_f64(a);
17401 }
17402 
17403 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
17404 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17405 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)17406 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
17407   return vreinterpretq_p16_p8(a);
17408 }
17409 
17410 // CHECK-LABEL: @test_vreinterpretq_p16_p64(
17411 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17412 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p64(poly64x2_t a)17413 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
17414   return vreinterpretq_p16_p64(a);
17415 }
17416 
17417 // CHECK-LABEL: @test_vreinterpretq_p64_s8(
17418 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17419 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s8(int8x16_t a)17420 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
17421   return vreinterpretq_p64_s8(a);
17422 }
17423 
17424 // CHECK-LABEL: @test_vreinterpretq_p64_s16(
17425 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17426 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s16(int16x8_t a)17427 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
17428   return vreinterpretq_p64_s16(a);
17429 }
17430 
17431 // CHECK-LABEL: @test_vreinterpretq_p64_s32(
17432 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17433 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s32(int32x4_t a)17434 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
17435   return vreinterpretq_p64_s32(a);
17436 }
17437 
17438 // CHECK-LABEL: @test_vreinterpretq_p64_s64(
17439 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_p64_s64(int64x2_t a)17440 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
17441   return vreinterpretq_p64_s64(a);
17442 }
17443 
17444 // CHECK-LABEL: @test_vreinterpretq_p64_u8(
17445 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17446 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u8(uint8x16_t a)17447 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
17448   return vreinterpretq_p64_u8(a);
17449 }
17450 
17451 // CHECK-LABEL: @test_vreinterpretq_p64_u16(
17452 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17453 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u16(uint16x8_t a)17454 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
17455   return vreinterpretq_p64_u16(a);
17456 }
17457 
17458 // CHECK-LABEL: @test_vreinterpretq_p64_u32(
17459 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17460 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u32(uint32x4_t a)17461 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
17462   return vreinterpretq_p64_u32(a);
17463 }
17464 
17465 // CHECK-LABEL: @test_vreinterpretq_p64_u64(
17466 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_p64_u64(uint64x2_t a)17467 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
17468   return vreinterpretq_p64_u64(a);
17469 }
17470 
17471 // CHECK-LABEL: @test_vreinterpretq_p64_f16(
17472 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
17473 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f16(float16x8_t a)17474 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
17475   return vreinterpretq_p64_f16(a);
17476 }
17477 
17478 // CHECK-LABEL: @test_vreinterpretq_p64_f32(
17479 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
17480 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f32(float32x4_t a)17481 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
17482   return vreinterpretq_p64_f32(a);
17483 }
17484 
17485 // CHECK-LABEL: @test_vreinterpretq_p64_f64(
17486 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
17487 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f64(float64x2_t a)17488 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
17489   return vreinterpretq_p64_f64(a);
17490 }
17491 
17492 // CHECK-LABEL: @test_vreinterpretq_p64_p8(
17493 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17494 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p8(poly8x16_t a)17495 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
17496   return vreinterpretq_p64_p8(a);
17497 }
17498 
17499 // CHECK-LABEL: @test_vreinterpretq_p64_p16(
17500 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17501 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p16(poly16x8_t a)17502 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
17503   return vreinterpretq_p64_p16(a);
17504 }
17505 
17506 // CHECK-LABEL: @test_vabds_f32(
17507 // CHECK:   [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b)
17508 // CHECK:   ret float [[VABDS_F32_I]]
test_vabds_f32(float32_t a,float32_t b)17509 float32_t test_vabds_f32(float32_t a, float32_t b) {
17510   return vabds_f32(a, b);
17511 }
17512 
17513 // CHECK-LABEL: @test_vabdd_f64(
17514 // CHECK:   [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b)
17515 // CHECK:   ret double [[VABDD_F64_I]]
test_vabdd_f64(float64_t a,float64_t b)17516 float64_t test_vabdd_f64(float64_t a, float64_t b) {
17517   return vabdd_f64(a, b);
17518 }
17519 
17520 // CHECK-LABEL: @test_vuqaddq_s8(
17521 // CHECK: entry:
17522 // CHECK-NEXT:  [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17523 // CHECK-NEXT:  ret <16 x i8> [[V]]
test_vuqaddq_s8(int8x16_t a,uint8x16_t b)17524 int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) {
17525   return vuqaddq_s8(a, b);
17526 }
17527 
17528 // CHECK-LABEL: @test_vuqaddq_s32(
17529 // CHECK: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17530 // CHECK-NEXT:  ret <4 x i32> [[V]]
test_vuqaddq_s32(int32x4_t a,uint32x4_t b)17531 int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) {
17532   return vuqaddq_s32(a, b);
17533 }
17534 
17535 // CHECK-LABEL: @test_vuqaddq_s64(
17536 // CHECK: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17537 // CHECK-NEXT:  ret <2 x i64> [[V]]
test_vuqaddq_s64(int64x2_t a,uint64x2_t b)17538 int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) {
17539   return vuqaddq_s64(a, b);
17540 }
17541 
17542 // CHECK-LABEL: @test_vuqaddq_s16(
17543 // CHECK: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17544 // CHECK-NEXT:  ret <8 x i16> [[V]]
test_vuqaddq_s16(int16x8_t a,uint16x8_t b)17545 int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) {
17546   return vuqaddq_s16(a, b);
17547 }
17548 
17549 // CHECK-LABEL: @test_vuqadd_s8(
17550 // CHECK: entry:
17551 // CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17552 // CHECK-NEXT: ret <8 x i8> [[V]]
test_vuqadd_s8(int8x8_t a,uint8x8_t b)17553 int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) {
17554   return vuqadd_s8(a, b);
17555 }
17556 
17557 // CHECK-LABEL: @test_vuqadd_s32(
17558 // CHECK: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17559 // CHECK-NEXT:  ret <2 x i32> [[V]]
test_vuqadd_s32(int32x2_t a,uint32x2_t b)17560 int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) {
17561   return vuqadd_s32(a, b);
17562 }
17563 
17564 // CHECK-LABEL: @test_vuqadd_s64(
17565 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17566 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17567 // CHECK:   [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17568 // CHECK:   ret <1 x i64> [[VUQADD2_I]]
test_vuqadd_s64(int64x1_t a,uint64x1_t b)17569 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
17570   return vuqadd_s64(a, b);
17571 }
17572 
17573 // CHECK-LABEL: @test_vuqadd_s16(
17574 // CHECK: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17575 // CHECK-NEXT:  ret <4 x i16> [[V]]
test_vuqadd_s16(int16x4_t a,uint16x4_t b)17576 int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) {
17577   return vuqadd_s16(a, b);
17578 }
17579 
17580 // CHECK-LABEL: @test_vsqadd_u64(
17581 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17582 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17583 // CHECK:   [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17584 // CHECK:   ret <1 x i64> [[VSQADD2_I]]
test_vsqadd_u64(uint64x1_t a,int64x1_t b)17585 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
17586   return vsqadd_u64(a, b);
17587 }
17588 
17589 // CHECK-LABEL: @test_vsqadd_u8(
17590 // CHECK:   [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17591 // CHECK:   ret <8 x i8> [[VSQADD_I]]
test_vsqadd_u8(uint8x8_t a,int8x8_t b)17592 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
17593   return vsqadd_u8(a, b);
17594 }
17595 
17596 // CHECK-LABEL: @test_vsqaddq_u8(
17597 // CHECK:   [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17598 // CHECK:   ret <16 x i8> [[VSQADD_I]]
test_vsqaddq_u8(uint8x16_t a,int8x16_t b)17599 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
17600   return vsqaddq_u8(a, b);
17601 }
17602 
17603 // CHECK-LABEL: @test_vsqadd_u16(
17604 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17605 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17606 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17607 // CHECK:   ret <4 x i16> [[VSQADD2_I]]
test_vsqadd_u16(uint16x4_t a,int16x4_t b)17608 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
17609   return vsqadd_u16(a, b);
17610 }
17611 
17612 // CHECK-LABEL: @test_vsqaddq_u16(
17613 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17614 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17615 // CHECK:   [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17616 // CHECK:   ret <8 x i16> [[VSQADD2_I]]
test_vsqaddq_u16(uint16x8_t a,int16x8_t b)17617 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
17618   return vsqaddq_u16(a, b);
17619 }
17620 
17621 // CHECK-LABEL: @test_vsqadd_u32(
17622 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17623 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17624 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17625 // CHECK:   ret <2 x i32> [[VSQADD2_I]]
test_vsqadd_u32(uint32x2_t a,int32x2_t b)17626 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
17627   return vsqadd_u32(a, b);
17628 }
17629 
17630 // CHECK-LABEL: @test_vsqaddq_u32(
17631 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17632 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17633 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17634 // CHECK:   ret <4 x i32> [[VSQADD2_I]]
test_vsqaddq_u32(uint32x4_t a,int32x4_t b)17635 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
17636   return vsqaddq_u32(a, b);
17637 }
17638 
17639 // CHECK-LABEL: @test_vsqaddq_u64(
17640 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17641 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17642 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17643 // CHECK:   ret <2 x i64> [[VSQADD2_I]]
test_vsqaddq_u64(uint64x2_t a,int64x2_t b)17644 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
17645   return vsqaddq_u64(a, b);
17646 }
17647 
17648 // CHECK-LABEL: @test_vabs_s64(
17649 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17650 // CHECK:   [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
17651 // CHECK:   ret <1 x i64> [[VABS1_I]]
test_vabs_s64(int64x1_t a)17652 int64x1_t test_vabs_s64(int64x1_t a) {
17653   return vabs_s64(a);
17654 }
17655 
17656 // CHECK-LABEL: @test_vqabs_s64(
17657 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17658 // CHECK:   [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
17659 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
17660 // CHECK:   ret <1 x i64> [[VQABS_V1_I]]
test_vqabs_s64(int64x1_t a)17661 int64x1_t test_vqabs_s64(int64x1_t a) {
17662   return vqabs_s64(a);
17663 }
17664 
17665 // CHECK-LABEL: @test_vqneg_s64(
17666 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17667 // CHECK:   [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
17668 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
17669 // CHECK:   ret <1 x i64> [[VQNEG_V1_I]]
test_vqneg_s64(int64x1_t a)17670 int64x1_t test_vqneg_s64(int64x1_t a) {
17671   return vqneg_s64(a);
17672 }
17673 
17674 // CHECK-LABEL: @test_vneg_s64(
17675 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
17676 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vneg_s64(int64x1_t a)17677 int64x1_t test_vneg_s64(int64x1_t a) {
17678   return vneg_s64(a);
17679 }
17680 
17681 // CHECK-LABEL: @test_vaddv_f32(
17682 // CHECK:   [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
17683 // CHECK:   ret float [[VADDV_F32_I]]
test_vaddv_f32(float32x2_t a)17684 float32_t test_vaddv_f32(float32x2_t a) {
17685   return vaddv_f32(a);
17686 }
17687 
17688 // CHECK-LABEL: @test_vaddvq_f32(
17689 // CHECK:   [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
17690 // CHECK:   ret float [[VADDVQ_F32_I]]
test_vaddvq_f32(float32x4_t a)17691 float32_t test_vaddvq_f32(float32x4_t a) {
17692   return vaddvq_f32(a);
17693 }
17694 
17695 // CHECK-LABEL: @test_vaddvq_f64(
17696 // CHECK:   [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
17697 // CHECK:   ret double [[VADDVQ_F64_I]]
test_vaddvq_f64(float64x2_t a)17698 float64_t test_vaddvq_f64(float64x2_t a) {
17699   return vaddvq_f64(a);
17700 }
17701 
17702 // CHECK-LABEL: @test_vmaxv_f32(
17703 // CHECK:   [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
17704 // CHECK:   ret float [[VMAXV_F32_I]]
test_vmaxv_f32(float32x2_t a)17705 float32_t test_vmaxv_f32(float32x2_t a) {
17706   return vmaxv_f32(a);
17707 }
17708 
17709 // CHECK-LABEL: @test_vmaxvq_f64(
17710 // CHECK:   [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
17711 // CHECK:   ret double [[VMAXVQ_F64_I]]
test_vmaxvq_f64(float64x2_t a)17712 float64_t test_vmaxvq_f64(float64x2_t a) {
17713   return vmaxvq_f64(a);
17714 }
17715 
17716 // CHECK-LABEL: @test_vminv_f32(
17717 // CHECK:   [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
17718 // CHECK:   ret float [[VMINV_F32_I]]
test_vminv_f32(float32x2_t a)17719 float32_t test_vminv_f32(float32x2_t a) {
17720   return vminv_f32(a);
17721 }
17722 
17723 // CHECK-LABEL: @test_vminvq_f64(
17724 // CHECK:   [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
17725 // CHECK:   ret double [[VMINVQ_F64_I]]
test_vminvq_f64(float64x2_t a)17726 float64_t test_vminvq_f64(float64x2_t a) {
17727   return vminvq_f64(a);
17728 }
17729 
17730 // CHECK-LABEL: @test_vmaxnmvq_f64(
17731 // CHECK:   [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
17732 // CHECK:   ret double [[VMAXNMVQ_F64_I]]
test_vmaxnmvq_f64(float64x2_t a)17733 float64_t test_vmaxnmvq_f64(float64x2_t a) {
17734   return vmaxnmvq_f64(a);
17735 }
17736 
17737 // CHECK-LABEL: @test_vmaxnmv_f32(
17738 // CHECK:   [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
17739 // CHECK:   ret float [[VMAXNMV_F32_I]]
test_vmaxnmv_f32(float32x2_t a)17740 float32_t test_vmaxnmv_f32(float32x2_t a) {
17741   return vmaxnmv_f32(a);
17742 }
17743 
17744 // CHECK-LABEL: @test_vminnmvq_f64(
17745 // CHECK:   [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
17746 // CHECK:   ret double [[VMINNMVQ_F64_I]]
test_vminnmvq_f64(float64x2_t a)17747 float64_t test_vminnmvq_f64(float64x2_t a) {
17748   return vminnmvq_f64(a);
17749 }
17750 
17751 // CHECK-LABEL: @test_vminnmv_f32(
17752 // CHECK:   [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
17753 // CHECK:   ret float [[VMINNMV_F32_I]]
test_vminnmv_f32(float32x2_t a)17754 float32_t test_vminnmv_f32(float32x2_t a) {
17755   return vminnmv_f32(a);
17756 }
17757 
17758 // CHECK-LABEL: @test_vpaddq_s64(
17759 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17760 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17761 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_s64(int64x2_t a,int64x2_t b)17762 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
17763   return vpaddq_s64(a, b);
17764 }
17765 
17766 // CHECK-LABEL: @test_vpaddq_u64(
17767 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17768 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17769 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_u64(uint64x2_t a,uint64x2_t b)17770 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
17771   return vpaddq_u64(a, b);
17772 }
17773 
17774 // CHECK-LABEL: @test_vpaddd_u64(
17775 // CHECK:   [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17776 // CHECK:   ret i64 [[VPADDD_U64_I]]
test_vpaddd_u64(uint64x2_t a)17777 uint64_t test_vpaddd_u64(uint64x2_t a) {
17778   return vpaddd_u64(a);
17779 }
17780 
17781 // CHECK-LABEL: @test_vaddvq_s64(
17782 // CHECK:   [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
17783 // CHECK:   ret i64 [[VADDVQ_S64_I]]
test_vaddvq_s64(int64x2_t a)17784 int64_t test_vaddvq_s64(int64x2_t a) {
17785   return vaddvq_s64(a);
17786 }
17787 
17788 // CHECK-LABEL: @test_vaddvq_u64(
17789 // CHECK:   [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17790 // CHECK:   ret i64 [[VADDVQ_U64_I]]
test_vaddvq_u64(uint64x2_t a)17791 uint64_t test_vaddvq_u64(uint64x2_t a) {
17792   return vaddvq_u64(a);
17793 }
17794 
17795 // CHECK-LABEL: @test_vadd_f64(
17796 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, %b
17797 // CHECK:   ret <1 x double> [[ADD_I]]
test_vadd_f64(float64x1_t a,float64x1_t b)17798 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
17799   return vadd_f64(a, b);
17800 }
17801 
17802 // CHECK-LABEL: @test_vmul_f64(
17803 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %a, %b
17804 // CHECK:   ret <1 x double> [[MUL_I]]
test_vmul_f64(float64x1_t a,float64x1_t b)17805 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
17806   return vmul_f64(a, b);
17807 }
17808 
17809 // CHECK-LABEL: @test_vdiv_f64(
17810 // CHECK:   [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
17811 // CHECK:   ret <1 x double> [[DIV_I]]
test_vdiv_f64(float64x1_t a,float64x1_t b)17812 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
17813   return vdiv_f64(a, b);
17814 }
17815 
17816 // CHECK-LABEL: @test_vmla_f64(
17817 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17818 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
17819 // CHECK:   ret <1 x double> [[ADD_I]]
test_vmla_f64(float64x1_t a,float64x1_t b,float64x1_t c)17820 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17821   return vmla_f64(a, b, c);
17822 }
17823 
17824 // CHECK-LABEL: @test_vmls_f64(
17825 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17826 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
17827 // CHECK:   ret <1 x double> [[SUB_I]]
test_vmls_f64(float64x1_t a,float64x1_t b,float64x1_t c)17828 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17829   return vmls_f64(a, b, c);
17830 }
17831 
17832 // CHECK-LABEL: @test_vfma_f64(
17833 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17834 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17835 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17836 // CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
17837 // CHECK:   ret <1 x double> [[TMP3]]
test_vfma_f64(float64x1_t a,float64x1_t b,float64x1_t c)17838 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17839   return vfma_f64(a, b, c);
17840 }
17841 
17842 // CHECK-LABEL: @test_vfms_f64(
17843 // CHECK:   [[SUB_I:%.*]] = fneg <1 x double> %b
17844 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17845 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
17846 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17847 // CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
17848 // CHECK:   ret <1 x double> [[TMP3]]
test_vfms_f64(float64x1_t a,float64x1_t b,float64x1_t c)17849 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17850   return vfms_f64(a, b, c);
17851 }
17852 
17853 // CHECK-LABEL: @test_vsub_f64(
17854 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, %b
17855 // CHECK:   ret <1 x double> [[SUB_I]]
test_vsub_f64(float64x1_t a,float64x1_t b)17856 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
17857   return vsub_f64(a, b);
17858 }
17859 
17860 // CHECK-LABEL: @test_vabd_f64(
17861 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17862 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17863 // CHECK:   [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
17864 // CHECK:   ret <1 x double> [[VABD2_I]]
test_vabd_f64(float64x1_t a,float64x1_t b)17865 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
17866   return vabd_f64(a, b);
17867 }
17868 
17869 // CHECK-LABEL: @test_vmax_f64(
17870 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17871 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17872 // CHECK:   [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
17873 // CHECK:   ret <1 x double> [[VMAX2_I]]
test_vmax_f64(float64x1_t a,float64x1_t b)17874 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
17875   return vmax_f64(a, b);
17876 }
17877 
17878 // CHECK-LABEL: @test_vmin_f64(
17879 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17880 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17881 // CHECK:   [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
17882 // CHECK:   ret <1 x double> [[VMIN2_I]]
test_vmin_f64(float64x1_t a,float64x1_t b)17883 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
17884   return vmin_f64(a, b);
17885 }
17886 
17887 // CHECK-LABEL: @test_vmaxnm_f64(
17888 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17889 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17890 // CHECK:   [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
17891 // CHECK:   ret <1 x double> [[VMAXNM2_I]]
test_vmaxnm_f64(float64x1_t a,float64x1_t b)17892 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
17893   return vmaxnm_f64(a, b);
17894 }
17895 
17896 // CHECK-LABEL: @test_vminnm_f64(
17897 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17898 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17899 // CHECK:   [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
17900 // CHECK:   ret <1 x double> [[VMINNM2_I]]
test_vminnm_f64(float64x1_t a,float64x1_t b)17901 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
17902   return vminnm_f64(a, b);
17903 }
17904 
17905 // CHECK-LABEL: @test_vabs_f64(
17906 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17907 // CHECK:   [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
17908 // CHECK:   ret <1 x double> [[VABS1_I]]
test_vabs_f64(float64x1_t a)17909 float64x1_t test_vabs_f64(float64x1_t a) {
17910   return vabs_f64(a);
17911 }
17912 
17913 // CHECK-LABEL: @test_vneg_f64(
17914 // CHECK:   [[SUB_I:%.*]] = fneg <1 x double> %a
17915 // CHECK:   ret <1 x double> [[SUB_I]]
test_vneg_f64(float64x1_t a)17916 float64x1_t test_vneg_f64(float64x1_t a) {
17917   return vneg_f64(a);
17918 }
17919 
17920 // CHECK-LABEL: @test_vcvt_s64_f64(
17921 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17922 // CHECK:   [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64>
17923 // CHECK:   ret <1 x i64> [[TMP1]]
test_vcvt_s64_f64(float64x1_t a)17924 int64x1_t test_vcvt_s64_f64(float64x1_t a) {
17925   return vcvt_s64_f64(a);
17926 }
17927 
17928 // CHECK-LABEL: @test_vcvt_u64_f64(
17929 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17930 // CHECK:   [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64>
17931 // CHECK:   ret <1 x i64> [[TMP1]]
test_vcvt_u64_f64(float64x1_t a)17932 uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
17933   return vcvt_u64_f64(a);
17934 }
17935 
17936 // CHECK-LABEL: @test_vcvtn_s64_f64(
17937 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17938 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
17939 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_s64_f64(float64x1_t a)17940 int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
17941   return vcvtn_s64_f64(a);
17942 }
17943 
17944 // CHECK-LABEL: @test_vcvtn_u64_f64(
17945 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17946 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
17947 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_u64_f64(float64x1_t a)17948 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
17949   return vcvtn_u64_f64(a);
17950 }
17951 
17952 // CHECK-LABEL: @test_vcvtp_s64_f64(
17953 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17954 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
17955 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_s64_f64(float64x1_t a)17956 int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
17957   return vcvtp_s64_f64(a);
17958 }
17959 
17960 // CHECK-LABEL: @test_vcvtp_u64_f64(
17961 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17962 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
17963 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_u64_f64(float64x1_t a)17964 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
17965   return vcvtp_u64_f64(a);
17966 }
17967 
17968 // CHECK-LABEL: @test_vcvtm_s64_f64(
17969 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17970 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
17971 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_s64_f64(float64x1_t a)17972 int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
17973   return vcvtm_s64_f64(a);
17974 }
17975 
17976 // CHECK-LABEL: @test_vcvtm_u64_f64(
17977 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17978 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
17979 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_u64_f64(float64x1_t a)17980 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
17981   return vcvtm_u64_f64(a);
17982 }
17983 
17984 // CHECK-LABEL: @test_vcvta_s64_f64(
17985 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17986 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
17987 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
test_vcvta_s64_f64(float64x1_t a)17988 int64x1_t test_vcvta_s64_f64(float64x1_t a) {
17989   return vcvta_s64_f64(a);
17990 }
17991 
17992 // CHECK-LABEL: @test_vcvta_u64_f64(
17993 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17994 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
17995 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
test_vcvta_u64_f64(float64x1_t a)17996 uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
17997   return vcvta_u64_f64(a);
17998 }
17999 
18000 // CHECK-LABEL: @test_vcvt_f64_s64(
18001 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18002 // CHECK:   [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
18003 // CHECK:   ret <1 x double> [[VCVT_I]]
test_vcvt_f64_s64(int64x1_t a)18004 float64x1_t test_vcvt_f64_s64(int64x1_t a) {
18005   return vcvt_f64_s64(a);
18006 }
18007 
18008 // CHECK-LABEL: @test_vcvt_f64_u64(
18009 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18010 // CHECK:   [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
18011 // CHECK:   ret <1 x double> [[VCVT_I]]
test_vcvt_f64_u64(uint64x1_t a)18012 float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
18013   return vcvt_f64_u64(a);
18014 }
18015 
18016 // CHECK-LABEL: @test_vcvt_n_s64_f64(
18017 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18018 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18019 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18020 // CHECK:   ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_s64_f64(float64x1_t a)18021 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
18022   return vcvt_n_s64_f64(a, 64);
18023 }
18024 
18025 // CHECK-LABEL: @test_vcvt_n_u64_f64(
18026 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18027 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18028 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18029 // CHECK:   ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_u64_f64(float64x1_t a)18030 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
18031   return vcvt_n_u64_f64(a, 64);
18032 }
18033 
18034 // CHECK-LABEL: @test_vcvt_n_f64_s64(
18035 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18036 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18037 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18038 // CHECK:   ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_s64(int64x1_t a)18039 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
18040   return vcvt_n_f64_s64(a, 64);
18041 }
18042 
18043 // CHECK-LABEL: @test_vcvt_n_f64_u64(
18044 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18045 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18046 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18047 // CHECK:   ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_u64(uint64x1_t a)18048 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
18049   return vcvt_n_f64_u64(a, 64);
18050 }
18051 
18052 // CHECK-LABEL: @test_vrndn_f64(
18053 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18054 // CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
18055 // CHECK:   ret <1 x double> [[VRNDN1_I]]
test_vrndn_f64(float64x1_t a)18056 float64x1_t test_vrndn_f64(float64x1_t a) {
18057   return vrndn_f64(a);
18058 }
18059 
18060 // CHECK-LABEL: @test_vrnda_f64(
18061 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18062 // CHECK:   [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
18063 // CHECK:   ret <1 x double> [[VRNDA1_I]]
test_vrnda_f64(float64x1_t a)18064 float64x1_t test_vrnda_f64(float64x1_t a) {
18065   return vrnda_f64(a);
18066 }
18067 
18068 // CHECK-LABEL: @test_vrndp_f64(
18069 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18070 // CHECK:   [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
18071 // CHECK:   ret <1 x double> [[VRNDP1_I]]
test_vrndp_f64(float64x1_t a)18072 float64x1_t test_vrndp_f64(float64x1_t a) {
18073   return vrndp_f64(a);
18074 }
18075 
18076 // CHECK-LABEL: @test_vrndm_f64(
18077 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18078 // CHECK:   [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
18079 // CHECK:   ret <1 x double> [[VRNDM1_I]]
test_vrndm_f64(float64x1_t a)18080 float64x1_t test_vrndm_f64(float64x1_t a) {
18081   return vrndm_f64(a);
18082 }
18083 
18084 // CHECK-LABEL: @test_vrndx_f64(
18085 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18086 // CHECK:   [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
18087 // CHECK:   ret <1 x double> [[VRNDX1_I]]
test_vrndx_f64(float64x1_t a)18088 float64x1_t test_vrndx_f64(float64x1_t a) {
18089   return vrndx_f64(a);
18090 }
18091 
18092 // CHECK-LABEL: @test_vrnd_f64(
18093 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18094 // CHECK:   [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
18095 // CHECK:   ret <1 x double> [[VRNDZ1_I]]
test_vrnd_f64(float64x1_t a)18096 float64x1_t test_vrnd_f64(float64x1_t a) {
18097   return vrnd_f64(a);
18098 }
18099 
18100 // CHECK-LABEL: @test_vrndi_f64(
18101 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18102 // CHECK:   [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
18103 // CHECK:   ret <1 x double> [[VRNDI1_I]]
test_vrndi_f64(float64x1_t a)18104 float64x1_t test_vrndi_f64(float64x1_t a) {
18105   return vrndi_f64(a);
18106 }
18107 
18108 // CHECK-LABEL: @test_vrsqrte_f64(
18109 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18110 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
18111 // CHECK:   ret <1 x double> [[VRSQRTE_V1_I]]
test_vrsqrte_f64(float64x1_t a)18112 float64x1_t test_vrsqrte_f64(float64x1_t a) {
18113   return vrsqrte_f64(a);
18114 }
18115 
18116 // CHECK-LABEL: @test_vrecpe_f64(
18117 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18118 // CHECK:   [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
18119 // CHECK:   ret <1 x double> [[VRECPE_V1_I]]
test_vrecpe_f64(float64x1_t a)18120 float64x1_t test_vrecpe_f64(float64x1_t a) {
18121   return vrecpe_f64(a);
18122 }
18123 
18124 // CHECK-LABEL: @test_vsqrt_f64(
18125 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18126 // CHECK:   [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
18127 // CHECK:   ret <1 x double> [[VSQRT_I]]
test_vsqrt_f64(float64x1_t a)18128 float64x1_t test_vsqrt_f64(float64x1_t a) {
18129   return vsqrt_f64(a);
18130 }
18131 
18132 // CHECK-LABEL: @test_vrecps_f64(
18133 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18134 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18135 // CHECK:   [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
18136 // CHECK:   ret <1 x double> [[VRECPS_V2_I]]
test_vrecps_f64(float64x1_t a,float64x1_t b)18137 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
18138   return vrecps_f64(a, b);
18139 }
18140 
18141 // CHECK-LABEL: @test_vrsqrts_f64(
18142 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18143 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18144 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
18145 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
18146 // CHECK:   ret <1 x double> [[VRSQRTS_V2_I]]
test_vrsqrts_f64(float64x1_t a,float64x1_t b)18147 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
18148   return vrsqrts_f64(a, b);
18149 }
18150 
18151 // CHECK-LABEL: @test_vminv_s32(
18152 // CHECK:   [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
18153 // CHECK:   ret i32 [[VMINV_S32_I]]
test_vminv_s32(int32x2_t a)18154 int32_t test_vminv_s32(int32x2_t a) {
18155   return vminv_s32(a);
18156 }
18157 
18158 // CHECK-LABEL: @test_vminv_u32(
18159 // CHECK:   [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
18160 // CHECK:   ret i32 [[VMINV_U32_I]]
test_vminv_u32(uint32x2_t a)18161 uint32_t test_vminv_u32(uint32x2_t a) {
18162   return vminv_u32(a);
18163 }
18164 
18165 // CHECK-LABEL: @test_vmaxv_s32(
18166 // CHECK:   [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
18167 // CHECK:   ret i32 [[VMAXV_S32_I]]
test_vmaxv_s32(int32x2_t a)18168 int32_t test_vmaxv_s32(int32x2_t a) {
18169   return vmaxv_s32(a);
18170 }
18171 
18172 // CHECK-LABEL: @test_vmaxv_u32(
18173 // CHECK:   [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
18174 // CHECK:   ret i32 [[VMAXV_U32_I]]
test_vmaxv_u32(uint32x2_t a)18175 uint32_t test_vmaxv_u32(uint32x2_t a) {
18176   return vmaxv_u32(a);
18177 }
18178 
18179 // CHECK-LABEL: @test_vaddv_s32(
18180 // CHECK:   [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
18181 // CHECK:   ret i32 [[VADDV_S32_I]]
test_vaddv_s32(int32x2_t a)18182 int32_t test_vaddv_s32(int32x2_t a) {
18183   return vaddv_s32(a);
18184 }
18185 
18186 // CHECK-LABEL: @test_vaddv_u32(
18187 // CHECK:   [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
18188 // CHECK:   ret i32 [[VADDV_U32_I]]
test_vaddv_u32(uint32x2_t a)18189 uint32_t test_vaddv_u32(uint32x2_t a) {
18190   return vaddv_u32(a);
18191 }
18192 
18193 // CHECK-LABEL: @test_vaddlv_s32(
18194 // CHECK:   [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
18195 // CHECK:   ret i64 [[VADDLV_S32_I]]
test_vaddlv_s32(int32x2_t a)18196 int64_t test_vaddlv_s32(int32x2_t a) {
18197   return vaddlv_s32(a);
18198 }
18199 
18200 // CHECK-LABEL: @test_vaddlv_u32(
18201 // CHECK:   [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
18202 // CHECK:   ret i64 [[VADDLV_U32_I]]
test_vaddlv_u32(uint32x2_t a)18203 uint64_t test_vaddlv_u32(uint32x2_t a) {
18204   return vaddlv_u32(a);
18205 }
18206