1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN:     -fallow-half-arguments-and-returns -S -disable-O0-optnone \
3 // RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
4 // RUN: | opt -S -mem2reg \
5 // RUN: | FileCheck %s
6 
7 // Test new aarch64 intrinsics and types
8 
9 #include <arm_neon.h>
10 
11 // CHECK-LABEL: @test_vadd_s8(
12 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
13 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t v1,int8x8_t v2)14 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
15   return vadd_s8(v1, v2);
16 }
17 
18 // CHECK-LABEL: @test_vadd_s16(
19 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
20 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t v1,int16x4_t v2)21 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
22   return vadd_s16(v1, v2);
23 }
24 
25 // CHECK-LABEL: @test_vadd_s32(
26 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
27 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t v1,int32x2_t v2)28 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
29   return vadd_s32(v1, v2);
30 }
31 
32 // CHECK-LABEL: @test_vadd_s64(
33 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
34 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t v1,int64x1_t v2)35 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
36   return vadd_s64(v1, v2);
37 }
38 
39 // CHECK-LABEL: @test_vadd_f32(
40 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
41 // CHECK:   ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t v1,float32x2_t v2)42 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
43   return vadd_f32(v1, v2);
44 }
45 
46 // CHECK-LABEL: @test_vadd_u8(
47 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
48 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t v1,uint8x8_t v2)49 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
50   return vadd_u8(v1, v2);
51 }
52 
53 // CHECK-LABEL: @test_vadd_u16(
54 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
55 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t v1,uint16x4_t v2)56 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
57   return vadd_u16(v1, v2);
58 }
59 
60 // CHECK-LABEL: @test_vadd_u32(
61 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
62 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t v1,uint32x2_t v2)63 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
64   return vadd_u32(v1, v2);
65 }
66 
67 // CHECK-LABEL: @test_vadd_u64(
68 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
69 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t v1,uint64x1_t v2)70 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
71   return vadd_u64(v1, v2);
72 }
73 
74 // CHECK-LABEL: @test_vaddq_s8(
75 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
76 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t v1,int8x16_t v2)77 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
78   return vaddq_s8(v1, v2);
79 }
80 
81 // CHECK-LABEL: @test_vaddq_s16(
82 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
83 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t v1,int16x8_t v2)84 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
85   return vaddq_s16(v1, v2);
86 }
87 
88 // CHECK-LABEL: @test_vaddq_s32(
89 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
90 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t v1,int32x4_t v2)91 int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
92   return vaddq_s32(v1, v2);
93 }
94 
95 // CHECK-LABEL: @test_vaddq_s64(
96 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
97 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t v1,int64x2_t v2)98 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
99   return vaddq_s64(v1, v2);
100 }
101 
102 // CHECK-LABEL: @test_vaddq_f32(
103 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
104 // CHECK:   ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t v1,float32x4_t v2)105 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
106   return vaddq_f32(v1, v2);
107 }
108 
109 // CHECK-LABEL: @test_vaddq_f64(
110 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
111 // CHECK:   ret <2 x double> [[ADD_I]]
test_vaddq_f64(float64x2_t v1,float64x2_t v2)112 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
113   return vaddq_f64(v1, v2);
114 }
115 
116 // CHECK-LABEL: @test_vaddq_u8(
117 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
118 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t v1,uint8x16_t v2)119 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
120   return vaddq_u8(v1, v2);
121 }
122 
123 // CHECK-LABEL: @test_vaddq_u16(
124 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
125 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t v1,uint16x8_t v2)126 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
127   return vaddq_u16(v1, v2);
128 }
129 
130 // CHECK-LABEL: @test_vaddq_u32(
131 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
132 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t v1,uint32x4_t v2)133 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
134   return vaddq_u32(v1, v2);
135 }
136 
137 // CHECK-LABEL: @test_vaddq_u64(
138 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
139 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t v1,uint64x2_t v2)140 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
141   return vaddq_u64(v1, v2);
142 }
143 
144 // CHECK-LABEL: @test_vsub_s8(
145 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
146 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t v1,int8x8_t v2)147 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
148   return vsub_s8(v1, v2);
149 }
150 
151 // CHECK-LABEL: @test_vsub_s16(
152 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
153 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t v1,int16x4_t v2)154 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
155   return vsub_s16(v1, v2);
156 }
157 
158 // CHECK-LABEL: @test_vsub_s32(
159 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
160 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t v1,int32x2_t v2)161 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
162   return vsub_s32(v1, v2);
163 }
164 
165 // CHECK-LABEL: @test_vsub_s64(
166 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
167 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t v1,int64x1_t v2)168 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
169   return vsub_s64(v1, v2);
170 }
171 
172 // CHECK-LABEL: @test_vsub_f32(
173 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
174 // CHECK:   ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t v1,float32x2_t v2)175 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
176   return vsub_f32(v1, v2);
177 }
178 
179 // CHECK-LABEL: @test_vsub_u8(
180 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
181 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t v1,uint8x8_t v2)182 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
183   return vsub_u8(v1, v2);
184 }
185 
186 // CHECK-LABEL: @test_vsub_u16(
187 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
188 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t v1,uint16x4_t v2)189 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
190   return vsub_u16(v1, v2);
191 }
192 
193 // CHECK-LABEL: @test_vsub_u32(
194 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
195 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t v1,uint32x2_t v2)196 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
197   return vsub_u32(v1, v2);
198 }
199 
200 // CHECK-LABEL: @test_vsub_u64(
201 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
202 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t v1,uint64x1_t v2)203 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
204   return vsub_u64(v1, v2);
205 }
206 
207 // CHECK-LABEL: @test_vsubq_s8(
208 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
209 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t v1,int8x16_t v2)210 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
211   return vsubq_s8(v1, v2);
212 }
213 
214 // CHECK-LABEL: @test_vsubq_s16(
215 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
216 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t v1,int16x8_t v2)217 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
218   return vsubq_s16(v1, v2);
219 }
220 
221 // CHECK-LABEL: @test_vsubq_s32(
222 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
223 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t v1,int32x4_t v2)224 int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) {
225   return vsubq_s32(v1, v2);
226 }
227 
228 // CHECK-LABEL: @test_vsubq_s64(
229 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
230 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t v1,int64x2_t v2)231 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
232   return vsubq_s64(v1, v2);
233 }
234 
235 // CHECK-LABEL: @test_vsubq_f32(
236 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
237 // CHECK:   ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t v1,float32x4_t v2)238 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
239   return vsubq_f32(v1, v2);
240 }
241 
242 // CHECK-LABEL: @test_vsubq_f64(
243 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
244 // CHECK:   ret <2 x double> [[SUB_I]]
test_vsubq_f64(float64x2_t v1,float64x2_t v2)245 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
246   return vsubq_f64(v1, v2);
247 }
248 
249 // CHECK-LABEL: @test_vsubq_u8(
250 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
251 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t v1,uint8x16_t v2)252 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
253   return vsubq_u8(v1, v2);
254 }
255 
256 // CHECK-LABEL: @test_vsubq_u16(
257 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
258 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t v1,uint16x8_t v2)259 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
260   return vsubq_u16(v1, v2);
261 }
262 
263 // CHECK-LABEL: @test_vsubq_u32(
264 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
265 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t v1,uint32x4_t v2)266 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
267   return vsubq_u32(v1, v2);
268 }
269 
270 // CHECK-LABEL: @test_vsubq_u64(
271 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
272 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t v1,uint64x2_t v2)273 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
274   return vsubq_u64(v1, v2);
275 }
276 
277 // CHECK-LABEL: @test_vmul_s8(
278 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
279 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t v1,int8x8_t v2)280 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
281   return vmul_s8(v1, v2);
282 }
283 
284 // CHECK-LABEL: @test_vmul_s16(
285 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
286 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t v1,int16x4_t v2)287 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
288   return vmul_s16(v1, v2);
289 }
290 
291 // CHECK-LABEL: @test_vmul_s32(
292 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
293 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t v1,int32x2_t v2)294 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
295   return vmul_s32(v1, v2);
296 }
297 
298 // CHECK-LABEL: @test_vmul_f32(
299 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
300 // CHECK:   ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t v1,float32x2_t v2)301 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
302   return vmul_f32(v1, v2);
303 }
304 
305 // CHECK-LABEL: @test_vmul_u8(
306 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
307 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t v1,uint8x8_t v2)308 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
309   return vmul_u8(v1, v2);
310 }
311 
312 // CHECK-LABEL: @test_vmul_u16(
313 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
314 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t v1,uint16x4_t v2)315 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
316   return vmul_u16(v1, v2);
317 }
318 
319 // CHECK-LABEL: @test_vmul_u32(
320 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
321 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t v1,uint32x2_t v2)322 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
323   return vmul_u32(v1, v2);
324 }
325 
326 // CHECK-LABEL: @test_vmulq_s8(
327 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
328 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t v1,int8x16_t v2)329 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
330   return vmulq_s8(v1, v2);
331 }
332 
333 // CHECK-LABEL: @test_vmulq_s16(
334 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
335 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t v1,int16x8_t v2)336 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
337   return vmulq_s16(v1, v2);
338 }
339 
340 // CHECK-LABEL: @test_vmulq_s32(
341 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
342 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t v1,int32x4_t v2)343 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
344   return vmulq_s32(v1, v2);
345 }
346 
347 // CHECK-LABEL: @test_vmulq_u8(
348 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
349 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t v1,uint8x16_t v2)350 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
351   return vmulq_u8(v1, v2);
352 }
353 
354 // CHECK-LABEL: @test_vmulq_u16(
355 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
356 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t v1,uint16x8_t v2)357 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
358   return vmulq_u16(v1, v2);
359 }
360 
361 // CHECK-LABEL: @test_vmulq_u32(
362 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
363 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t v1,uint32x4_t v2)364 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
365   return vmulq_u32(v1, v2);
366 }
367 
368 // CHECK-LABEL: @test_vmulq_f32(
369 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
370 // CHECK:   ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t v1,float32x4_t v2)371 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
372   return vmulq_f32(v1, v2);
373 }
374 
375 // CHECK-LABEL: @test_vmulq_f64(
376 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
377 // CHECK:   ret <2 x double> [[MUL_I]]
test_vmulq_f64(float64x2_t v1,float64x2_t v2)378 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
379   return vmulq_f64(v1, v2);
380 }
381 
382 // CHECK-LABEL: @test_vmul_p8(
383 // CHECK:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
384 // CHECK:   ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t v1,poly8x8_t v2)385 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
386   return vmul_p8(v1, v2);
387 }
388 
389 // CHECK-LABEL: @test_vmulq_p8(
390 // CHECK:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
391 // CHECK:   ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t v1,poly8x16_t v2)392 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
393   return vmulq_p8(v1, v2);
394 }
395 
396 // CHECK-LABEL: @test_vmla_s8(
397 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
398 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
399 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)400 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
401   return vmla_s8(v1, v2, v3);
402 }
403 
404 // CHECK-LABEL: @test_vmla_s16(
405 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
406 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
407 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
408 // CHECK:   ret <8 x i8> [[TMP0]]
test_vmla_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)409 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
410   return (int8x8_t)vmla_s16(v1, v2, v3);
411 }
412 
413 // CHECK-LABEL: @test_vmla_s32(
414 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
415 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
416 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)417 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
418   return vmla_s32(v1, v2, v3);
419 }
420 
421 // CHECK-LABEL: @test_vmla_f32(
422 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
423 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
424 // CHECK:   ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)425 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
426   return vmla_f32(v1, v2, v3);
427 }
428 
429 // CHECK-LABEL: @test_vmla_u8(
430 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
431 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
432 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)433 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
434   return vmla_u8(v1, v2, v3);
435 }
436 
437 // CHECK-LABEL: @test_vmla_u16(
438 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
439 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
440 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)441 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
442   return vmla_u16(v1, v2, v3);
443 }
444 
445 // CHECK-LABEL: @test_vmla_u32(
446 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
447 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
448 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)449 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
450   return vmla_u32(v1, v2, v3);
451 }
452 
453 // CHECK-LABEL: @test_vmlaq_s8(
454 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
455 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
456 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)457 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
458   return vmlaq_s8(v1, v2, v3);
459 }
460 
461 // CHECK-LABEL: @test_vmlaq_s16(
462 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
463 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
464 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)465 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
466   return vmlaq_s16(v1, v2, v3);
467 }
468 
469 // CHECK-LABEL: @test_vmlaq_s32(
470 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
471 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
472 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)473 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
474   return vmlaq_s32(v1, v2, v3);
475 }
476 
477 // CHECK-LABEL: @test_vmlaq_f32(
478 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
479 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
480 // CHECK:   ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)481 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
482   return vmlaq_f32(v1, v2, v3);
483 }
484 
485 // CHECK-LABEL: @test_vmlaq_u8(
486 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
487 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
488 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)489 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
490   return vmlaq_u8(v1, v2, v3);
491 }
492 
493 // CHECK-LABEL: @test_vmlaq_u16(
494 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
495 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
496 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)497 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
498   return vmlaq_u16(v1, v2, v3);
499 }
500 
501 // CHECK-LABEL: @test_vmlaq_u32(
502 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
503 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
504 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)505 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
506   return vmlaq_u32(v1, v2, v3);
507 }
508 
509 // CHECK-LABEL: @test_vmlaq_f64(
510 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
511 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
512 // CHECK:   ret <2 x double> [[ADD_I]]
test_vmlaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)513 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
514   return vmlaq_f64(v1, v2, v3);
515 }
516 
517 // CHECK-LABEL: @test_vmls_s8(
518 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
519 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
520 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)521 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
522   return vmls_s8(v1, v2, v3);
523 }
524 
525 // CHECK-LABEL: @test_vmls_s16(
526 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
527 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
528 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
529 // CHECK:   ret <8 x i8> [[TMP0]]
test_vmls_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)530 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
531   return (int8x8_t)vmls_s16(v1, v2, v3);
532 }
533 
534 // CHECK-LABEL: @test_vmls_s32(
535 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
536 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
537 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)538 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
539   return vmls_s32(v1, v2, v3);
540 }
541 
542 // CHECK-LABEL: @test_vmls_f32(
543 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
544 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
545 // CHECK:   ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)546 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
547   return vmls_f32(v1, v2, v3);
548 }
549 
550 // CHECK-LABEL: @test_vmls_u8(
551 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
552 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
553 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)554 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
555   return vmls_u8(v1, v2, v3);
556 }
557 
558 // CHECK-LABEL: @test_vmls_u16(
559 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
560 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
561 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)562 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
563   return vmls_u16(v1, v2, v3);
564 }
565 
566 // CHECK-LABEL: @test_vmls_u32(
567 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
568 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
569 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)570 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
571   return vmls_u32(v1, v2, v3);
572 }
573 
574 // CHECK-LABEL: @test_vmlsq_s8(
575 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
576 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
577 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)578 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
579   return vmlsq_s8(v1, v2, v3);
580 }
581 
582 // CHECK-LABEL: @test_vmlsq_s16(
583 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
584 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
585 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)586 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
587   return vmlsq_s16(v1, v2, v3);
588 }
589 
590 // CHECK-LABEL: @test_vmlsq_s32(
591 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
592 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
593 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)594 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
595   return vmlsq_s32(v1, v2, v3);
596 }
597 
598 // CHECK-LABEL: @test_vmlsq_f32(
599 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
600 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
601 // CHECK:   ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)602 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
603   return vmlsq_f32(v1, v2, v3);
604 }
605 
606 // CHECK-LABEL: @test_vmlsq_u8(
607 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
608 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
609 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)610 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
611   return vmlsq_u8(v1, v2, v3);
612 }
613 
614 // CHECK-LABEL: @test_vmlsq_u16(
615 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
616 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
617 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)618 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
619   return vmlsq_u16(v1, v2, v3);
620 }
621 
622 // CHECK-LABEL: @test_vmlsq_u32(
623 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
624 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
625 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)626 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
627   return vmlsq_u32(v1, v2, v3);
628 }
629 
630 // CHECK-LABEL: @test_vmlsq_f64(
631 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
632 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
633 // CHECK:   ret <2 x double> [[SUB_I]]
test_vmlsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)634 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
635   return vmlsq_f64(v1, v2, v3);
636 }
637 
638 // CHECK-LABEL: @test_vfma_f32(
639 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
640 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
641 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
642 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
643 // CHECK:   ret <2 x float> [[TMP3]]
test_vfma_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)644 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
645   return vfma_f32(v1, v2, v3);
646 }
647 
648 // CHECK-LABEL: @test_vfmaq_f32(
649 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
650 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
651 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
652 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
653 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)654 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
655   return vfmaq_f32(v1, v2, v3);
656 }
657 
658 // CHECK-LABEL: @test_vfmaq_f64(
659 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
660 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
661 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
662 // CHECK:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
663 // CHECK:   ret <2 x double> [[TMP3]]
test_vfmaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)664 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
665   return vfmaq_f64(v1, v2, v3);
666 }
667 
668 // CHECK-LABEL: @test_vfms_f32(
669 // CHECK:   [[SUB_I:%.*]] = fneg <2 x float> %v2
670 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
671 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
672 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
673 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
674 // CHECK:   ret <2 x float> [[TMP3]]
test_vfms_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)675 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
676   return vfms_f32(v1, v2, v3);
677 }
678 
679 // CHECK-LABEL: @test_vfmsq_f32(
680 // CHECK:   [[SUB_I:%.*]] = fneg <4 x float> %v2
681 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
682 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
683 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
684 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
685 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)686 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
687   return vfmsq_f32(v1, v2, v3);
688 }
689 
690 // CHECK-LABEL: @test_vfmsq_f64(
691 // CHECK:   [[SUB_I:%.*]] = fneg <2 x double> %v2
692 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
693 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
694 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
695 // CHECK:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
696 // CHECK:   ret <2 x double> [[TMP3]]
test_vfmsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)697 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
698   return vfmsq_f64(v1, v2, v3);
699 }
700 
701 // CHECK-LABEL: @test_vdivq_f64(
702 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
703 // CHECK:   ret <2 x double> [[DIV_I]]
test_vdivq_f64(float64x2_t v1,float64x2_t v2)704 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
705   return vdivq_f64(v1, v2);
706 }
707 
708 // CHECK-LABEL: @test_vdivq_f32(
709 // CHECK:   [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
710 // CHECK:   ret <4 x float> [[DIV_I]]
test_vdivq_f32(float32x4_t v1,float32x4_t v2)711 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
712   return vdivq_f32(v1, v2);
713 }
714 
715 // CHECK-LABEL: @test_vdiv_f32(
716 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
717 // CHECK:   ret <2 x float> [[DIV_I]]
test_vdiv_f32(float32x2_t v1,float32x2_t v2)718 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
719   return vdiv_f32(v1, v2);
720 }
721 
722 // CHECK-LABEL: @test_vaba_s8(
723 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
724 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
725 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)726 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
727   return vaba_s8(v1, v2, v3);
728 }
729 
730 // CHECK-LABEL: @test_vaba_s16(
731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
732 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
733 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
734 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
735 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)736 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
737   return vaba_s16(v1, v2, v3);
738 }
739 
740 // CHECK-LABEL: @test_vaba_s32(
741 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
742 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
743 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
744 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
745 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)746 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
747   return vaba_s32(v1, v2, v3);
748 }
749 
750 // CHECK-LABEL: @test_vaba_u8(
751 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
752 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
753 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)754 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
755   return vaba_u8(v1, v2, v3);
756 }
757 
758 // CHECK-LABEL: @test_vaba_u16(
759 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
760 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
761 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
762 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
763 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)764 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
765   return vaba_u16(v1, v2, v3);
766 }
767 
768 // CHECK-LABEL: @test_vaba_u32(
769 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
770 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
771 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
772 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
773 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)774 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
775   return vaba_u32(v1, v2, v3);
776 }
777 
778 // CHECK-LABEL: @test_vabaq_s8(
779 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
780 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
781 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)782 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
783   return vabaq_s8(v1, v2, v3);
784 }
785 
786 // CHECK-LABEL: @test_vabaq_s16(
787 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
788 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
789 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
790 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
791 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)792 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
793   return vabaq_s16(v1, v2, v3);
794 }
795 
796 // CHECK-LABEL: @test_vabaq_s32(
797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
798 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
799 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
800 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
801 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)802 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
803   return vabaq_s32(v1, v2, v3);
804 }
805 
806 // CHECK-LABEL: @test_vabaq_u8(
807 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
808 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
809 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)810 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
811   return vabaq_u8(v1, v2, v3);
812 }
813 
814 // CHECK-LABEL: @test_vabaq_u16(
815 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
816 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
817 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
818 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
819 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)820 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
821   return vabaq_u16(v1, v2, v3);
822 }
823 
824 // CHECK-LABEL: @test_vabaq_u32(
825 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
826 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
827 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
828 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
829 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)830 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
831   return vabaq_u32(v1, v2, v3);
832 }
833 
834 // CHECK-LABEL: @test_vabd_s8(
835 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
836 // CHECK:   ret <8 x i8> [[VABD_I]]
test_vabd_s8(int8x8_t v1,int8x8_t v2)837 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
838   return vabd_s8(v1, v2);
839 }
840 
841 // CHECK-LABEL: @test_vabd_s16(
842 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
843 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
844 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
845 // CHECK:   ret <4 x i16> [[VABD2_I]]
test_vabd_s16(int16x4_t v1,int16x4_t v2)846 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
847   return vabd_s16(v1, v2);
848 }
849 
850 // CHECK-LABEL: @test_vabd_s32(
851 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
852 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
853 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
854 // CHECK:   ret <2 x i32> [[VABD2_I]]
test_vabd_s32(int32x2_t v1,int32x2_t v2)855 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
856   return vabd_s32(v1, v2);
857 }
858 
859 // CHECK-LABEL: @test_vabd_u8(
860 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
861 // CHECK:   ret <8 x i8> [[VABD_I]]
test_vabd_u8(uint8x8_t v1,uint8x8_t v2)862 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
863   return vabd_u8(v1, v2);
864 }
865 
866 // CHECK-LABEL: @test_vabd_u16(
867 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
868 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
869 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
870 // CHECK:   ret <4 x i16> [[VABD2_I]]
test_vabd_u16(uint16x4_t v1,uint16x4_t v2)871 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
872   return vabd_u16(v1, v2);
873 }
874 
875 // CHECK-LABEL: @test_vabd_u32(
876 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
877 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
878 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
879 // CHECK:   ret <2 x i32> [[VABD2_I]]
test_vabd_u32(uint32x2_t v1,uint32x2_t v2)880 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
881   return vabd_u32(v1, v2);
882 }
883 
884 // CHECK-LABEL: @test_vabd_f32(
885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
886 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
887 // CHECK:   [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2)
888 // CHECK:   ret <2 x float> [[VABD2_I]]
test_vabd_f32(float32x2_t v1,float32x2_t v2)889 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
890   return vabd_f32(v1, v2);
891 }
892 
893 // CHECK-LABEL: @test_vabdq_s8(
894 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
895 // CHECK:   ret <16 x i8> [[VABD_I]]
test_vabdq_s8(int8x16_t v1,int8x16_t v2)896 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
897   return vabdq_s8(v1, v2);
898 }
899 
900 // CHECK-LABEL: @test_vabdq_s16(
901 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
902 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
903 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
904 // CHECK:   ret <8 x i16> [[VABD2_I]]
test_vabdq_s16(int16x8_t v1,int16x8_t v2)905 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
906   return vabdq_s16(v1, v2);
907 }
908 
909 // CHECK-LABEL: @test_vabdq_s32(
910 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
911 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
912 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
913 // CHECK:   ret <4 x i32> [[VABD2_I]]
test_vabdq_s32(int32x4_t v1,int32x4_t v2)914 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
915   return vabdq_s32(v1, v2);
916 }
917 
918 // CHECK-LABEL: @test_vabdq_u8(
919 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
920 // CHECK:   ret <16 x i8> [[VABD_I]]
test_vabdq_u8(uint8x16_t v1,uint8x16_t v2)921 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
922   return vabdq_u8(v1, v2);
923 }
924 
925 // CHECK-LABEL: @test_vabdq_u16(
926 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
927 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
928 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
929 // CHECK:   ret <8 x i16> [[VABD2_I]]
test_vabdq_u16(uint16x8_t v1,uint16x8_t v2)930 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
931   return vabdq_u16(v1, v2);
932 }
933 
934 // CHECK-LABEL: @test_vabdq_u32(
935 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
936 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
937 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
938 // CHECK:   ret <4 x i32> [[VABD2_I]]
test_vabdq_u32(uint32x4_t v1,uint32x4_t v2)939 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
940   return vabdq_u32(v1, v2);
941 }
942 
943 // CHECK-LABEL: @test_vabdq_f32(
944 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
945 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
946 // CHECK:   [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2)
947 // CHECK:   ret <4 x float> [[VABD2_I]]
test_vabdq_f32(float32x4_t v1,float32x4_t v2)948 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
949   return vabdq_f32(v1, v2);
950 }
951 
952 // CHECK-LABEL: @test_vabdq_f64(
953 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
954 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
955 // CHECK:   [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2)
956 // CHECK:   ret <2 x double> [[VABD2_I]]
test_vabdq_f64(float64x2_t v1,float64x2_t v2)957 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
958   return vabdq_f64(v1, v2);
959 }
960 
961 // CHECK-LABEL: @test_vbsl_s8(
962 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
963 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
964 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
965 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
966 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_s8(uint8x8_t v1,int8x8_t v2,int8x8_t v3)967 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
968   return vbsl_s8(v1, v2, v3);
969 }
970 
971 // CHECK-LABEL: @test_vbsl_s16(
972 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
973 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
974 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
975 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
976 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
977 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
978 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
979 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
980 // CHECK:   ret <8 x i8> [[TMP4]]
test_vbsl_s16(uint16x4_t v1,int16x4_t v2,int16x4_t v3)981 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
982   return (int8x8_t)vbsl_s16(v1, v2, v3);
983 }
984 
985 // CHECK-LABEL: @test_vbsl_s32(
986 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
987 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
988 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
989 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
990 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
991 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
992 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
993 // CHECK:   ret <2 x i32> [[VBSL5_I]]
test_vbsl_s32(uint32x2_t v1,int32x2_t v2,int32x2_t v3)994 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
995   return vbsl_s32(v1, v2, v3);
996 }
997 
998 // CHECK-LABEL: @test_vbsl_s64(
999 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1000 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1001 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1002 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1003 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1004 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1005 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1006 // CHECK:   ret <1 x i64> [[VBSL5_I]]
test_vbsl_s64(uint64x1_t v1,int64x1_t v2,int64x1_t v3)1007 int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) {
1008   return vbsl_s64(v1, v2, v3);
1009 }
1010 
1011 // CHECK-LABEL: @test_vbsl_u8(
1012 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1013 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1014 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1015 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1016 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)1017 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
1018   return vbsl_u8(v1, v2, v3);
1019 }
1020 
1021 // CHECK-LABEL: @test_vbsl_u16(
1022 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1023 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1024 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1025 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1026 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1027 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1028 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1029 // CHECK:   ret <4 x i16> [[VBSL5_I]]
test_vbsl_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)1030 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
1031   return vbsl_u16(v1, v2, v3);
1032 }
1033 
1034 // CHECK-LABEL: @test_vbsl_u32(
1035 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1036 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1037 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1038 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
1039 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1040 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
1041 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1042 // CHECK:   ret <2 x i32> [[VBSL5_I]]
test_vbsl_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)1043 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
1044   return vbsl_u32(v1, v2, v3);
1045 }
1046 
1047 // CHECK-LABEL: @test_vbsl_u64(
1048 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1049 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1050 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1051 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1052 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1053 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1054 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1055 // CHECK:   ret <1 x i64> [[VBSL5_I]]
test_vbsl_u64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1056 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1057   return vbsl_u64(v1, v2, v3);
1058 }
1059 
1060 // CHECK-LABEL: @test_vbsl_f32(
1061 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1062 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1063 // CHECK:   [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1064 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1065 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1066 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]]
1067 // CHECK:   [[TMP4:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1068 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1069 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1070 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1071 // CHECK:   ret <2 x float> [[TMP5]]
test_vbsl_f32(uint32x2_t v1,float32x2_t v2,float32x2_t v3)1072 float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
1073   return vbsl_f32(v1, v2, v3);
1074 }
1075 
1076 // CHECK-LABEL: @test_vbsl_f64(
1077 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1078 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1079 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1080 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1081 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1082 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]]
1083 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1084 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1085 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1086 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1087 // CHECK:   ret <1 x double> [[TMP4]]
test_vbsl_f64(uint64x1_t v1,float64x1_t v2,float64x1_t v3)1088 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
1089   return vbsl_f64(v1, v2, v3);
1090 }
1091 
1092 // CHECK-LABEL: @test_vbsl_p8(
1093 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1094 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1095 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1096 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1097 // CHECK:   ret <8 x i8> [[VBSL2_I]]
test_vbsl_p8(uint8x8_t v1,poly8x8_t v2,poly8x8_t v3)1098 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
1099   return vbsl_p8(v1, v2, v3);
1100 }
1101 
1102 // CHECK-LABEL: @test_vbsl_p16(
1103 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1104 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1105 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1106 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1107 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1108 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1109 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1110 // CHECK:   ret <4 x i16> [[VBSL5_I]]
test_vbsl_p16(uint16x4_t v1,poly16x4_t v2,poly16x4_t v3)1111 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
1112   return vbsl_p16(v1, v2, v3);
1113 }
1114 
1115 // CHECK-LABEL: @test_vbslq_s8(
1116 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1117 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1118 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1119 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1120 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_s8(uint8x16_t v1,int8x16_t v2,int8x16_t v3)1121 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
1122   return vbslq_s8(v1, v2, v3);
1123 }
1124 
1125 // CHECK-LABEL: @test_vbslq_s16(
1126 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1127 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1128 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1129 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1130 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1131 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1132 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1133 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_s16(uint16x8_t v1,int16x8_t v2,int16x8_t v3)1134 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
1135   return vbslq_s16(v1, v2, v3);
1136 }
1137 
1138 // CHECK-LABEL: @test_vbslq_s32(
1139 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1140 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1141 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1142 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1143 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1144 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1145 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146 // CHECK:   ret <4 x i32> [[VBSL5_I]]
test_vbslq_s32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1147 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1148   return vbslq_s32(v1, v2, v3);
1149 }
1150 
1151 // CHECK-LABEL: @test_vbslq_s64(
1152 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1153 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1154 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1155 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1156 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1157 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1158 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1159 // CHECK:   ret <2 x i64> [[VBSL5_I]]
test_vbslq_s64(uint64x2_t v1,int64x2_t v2,int64x2_t v3)1160 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
1161   return vbslq_s64(v1, v2, v3);
1162 }
1163 
1164 // CHECK-LABEL: @test_vbslq_u8(
1165 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1166 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1167 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1168 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1169 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)1170 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
1171   return vbslq_u8(v1, v2, v3);
1172 }
1173 
1174 // CHECK-LABEL: @test_vbslq_u16(
1175 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1176 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1177 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1178 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1179 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1180 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1181 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1182 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)1183 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
1184   return vbslq_u16(v1, v2, v3);
1185 }
1186 
1187 // CHECK-LABEL: @test_vbslq_u32(
1188 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1189 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1190 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1191 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1192 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1193 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1194 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1195 // CHECK:   ret <4 x i32> [[VBSL5_I]]
test_vbslq_u32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1196 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1197   return vbslq_s32(v1, v2, v3);
1198 }
1199 
1200 // CHECK-LABEL: @test_vbslq_u64(
1201 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1202 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1203 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1204 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1205 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1206 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1207 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1208 // CHECK:   ret <2 x i64> [[VBSL5_I]]
test_vbslq_u64(uint64x2_t v1,uint64x2_t v2,uint64x2_t v3)1209 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
1210   return vbslq_u64(v1, v2, v3);
1211 }
1212 
1213 // CHECK-LABEL: @test_vbslq_f32(
1214 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1215 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1216 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1217 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1218 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1219 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]]
1220 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1221 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1222 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1223 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1224 // CHECK:   ret <4 x float> [[TMP4]]
test_vbslq_f32(uint32x4_t v1,float32x4_t v2,float32x4_t v3)1225 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
1226   return vbslq_f32(v1, v2, v3);
1227 }
1228 
1229 // CHECK-LABEL: @test_vbslq_p8(
1230 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1231 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1232 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1233 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1234 // CHECK:   ret <16 x i8> [[VBSL2_I]]
test_vbslq_p8(uint8x16_t v1,poly8x16_t v2,poly8x16_t v3)1235 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
1236   return vbslq_p8(v1, v2, v3);
1237 }
1238 
1239 // CHECK-LABEL: @test_vbslq_p16(
1240 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1241 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1242 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1243 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1244 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1245 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1246 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1247 // CHECK:   ret <8 x i16> [[VBSL5_I]]
test_vbslq_p16(uint16x8_t v1,poly16x8_t v2,poly16x8_t v3)1248 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
1249   return vbslq_p16(v1, v2, v3);
1250 }
1251 
1252 // CHECK-LABEL: @test_vbslq_f64(
1253 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1254 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1255 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1256 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1257 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1258 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]]
1259 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1260 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1261 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1262 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1263 // CHECK:   ret <2 x double> [[TMP4]]
test_vbslq_f64(uint64x2_t v1,float64x2_t v2,float64x2_t v3)1264 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
1265   return vbslq_f64(v1, v2, v3);
1266 }
1267 
1268 // CHECK-LABEL: @test_vrecps_f32(
1269 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1270 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1271 // CHECK:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
1272 // CHECK:   ret <2 x float> [[VRECPS_V2_I]]
test_vrecps_f32(float32x2_t v1,float32x2_t v2)1273 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
1274   return vrecps_f32(v1, v2);
1275 }
1276 
1277 // CHECK-LABEL: @test_vrecpsq_f32(
1278 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1279 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1280 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
1281 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1282 // CHECK:   ret <4 x float> [[VRECPSQ_V2_I]]
test_vrecpsq_f32(float32x4_t v1,float32x4_t v2)1283 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
1284   return vrecpsq_f32(v1, v2);
1285 }
1286 
1287 // CHECK-LABEL: @test_vrecpsq_f64(
1288 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1289 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1290 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
1291 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1292 // CHECK:   ret <2 x double> [[VRECPSQ_V2_I]]
test_vrecpsq_f64(float64x2_t v1,float64x2_t v2)1293 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
1294   return vrecpsq_f64(v1, v2);
1295 }
1296 
1297 // CHECK-LABEL: @test_vrsqrts_f32(
1298 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1299 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1300 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
1301 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1302 // CHECK:   ret <2 x float> [[VRSQRTS_V2_I]]
test_vrsqrts_f32(float32x2_t v1,float32x2_t v2)1303 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
1304   return vrsqrts_f32(v1, v2);
1305 }
1306 
1307 // CHECK-LABEL: @test_vrsqrtsq_f32(
1308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1309 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1310 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
1311 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1312 // CHECK:   ret <4 x float> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f32(float32x4_t v1,float32x4_t v2)1313 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
1314   return vrsqrtsq_f32(v1, v2);
1315 }
1316 
1317 // CHECK-LABEL: @test_vrsqrtsq_f64(
1318 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1319 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1320 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
1321 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1322 // CHECK:   ret <2 x double> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f64(float64x2_t v1,float64x2_t v2)1323 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
1324   return vrsqrtsq_f64(v1, v2);
1325 }
1326 
1327 // CHECK-LABEL: @test_vcage_f32(
1328 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1329 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1330 // CHECK:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1331 // CHECK:   ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t v1,float32x2_t v2)1332 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
1333   return vcage_f32(v1, v2);
1334 }
1335 
1336 // CHECK-LABEL: @test_vcage_f64(
1337 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1338 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1339 // CHECK:   [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1340 // CHECK:   ret <1 x i64> [[VCAGE_V2_I]]
test_vcage_f64(float64x1_t a,float64x1_t b)1341 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
1342   return vcage_f64(a, b);
1343 }
1344 
1345 // CHECK-LABEL: @test_vcageq_f32(
1346 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1347 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1348 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1349 // CHECK:   ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t v1,float32x4_t v2)1350 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
1351   return vcageq_f32(v1, v2);
1352 }
1353 
1354 // CHECK-LABEL: @test_vcageq_f64(
1355 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1356 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1358 // CHECK:   ret <2 x i64> [[VCAGEQ_V2_I]]
test_vcageq_f64(float64x2_t v1,float64x2_t v2)1359 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
1360   return vcageq_f64(v1, v2);
1361 }
1362 
1363 // CHECK-LABEL: @test_vcagt_f32(
1364 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1365 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1366 // CHECK:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1367 // CHECK:   ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t v1,float32x2_t v2)1368 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
1369   return vcagt_f32(v1, v2);
1370 }
1371 
1372 // CHECK-LABEL: @test_vcagt_f64(
1373 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1374 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1375 // CHECK:   [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1376 // CHECK:   ret <1 x i64> [[VCAGT_V2_I]]
test_vcagt_f64(float64x1_t a,float64x1_t b)1377 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
1378   return vcagt_f64(a, b);
1379 }
1380 
1381 // CHECK-LABEL: @test_vcagtq_f32(
1382 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1383 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1384 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1385 // CHECK:   ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t v1,float32x4_t v2)1386 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
1387   return vcagtq_f32(v1, v2);
1388 }
1389 
1390 // CHECK-LABEL: @test_vcagtq_f64(
1391 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1392 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1393 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1394 // CHECK:   ret <2 x i64> [[VCAGTQ_V2_I]]
test_vcagtq_f64(float64x2_t v1,float64x2_t v2)1395 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
1396   return vcagtq_f64(v1, v2);
1397 }
1398 
1399 // CHECK-LABEL: @test_vcale_f32(
1400 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1401 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1402 // CHECK:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1403 // CHECK:   ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t v1,float32x2_t v2)1404 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
1405   return vcale_f32(v1, v2);
1406   // Using registers other than v0, v1 are possible, but would be odd.
1407 }
1408 
1409 // CHECK-LABEL: @test_vcale_f64(
1410 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1411 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1412 // CHECK:   [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1413 // CHECK:   ret <1 x i64> [[VCALE_V2_I]]
test_vcale_f64(float64x1_t a,float64x1_t b)1414 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
1415   return vcale_f64(a, b);
1416 }
1417 
1418 // CHECK-LABEL: @test_vcaleq_f32(
1419 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1420 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1421 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1422 // CHECK:   ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t v1,float32x4_t v2)1423 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
1424   return vcaleq_f32(v1, v2);
1425   // Using registers other than v0, v1 are possible, but would be odd.
1426 }
1427 
1428 // CHECK-LABEL: @test_vcaleq_f64(
1429 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1430 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1431 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1432 // CHECK:   ret <2 x i64> [[VCALEQ_V2_I]]
test_vcaleq_f64(float64x2_t v1,float64x2_t v2)1433 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
1434   return vcaleq_f64(v1, v2);
1435   // Using registers other than v0, v1 are possible, but would be odd.
1436 }
1437 
1438 // CHECK-LABEL: @test_vcalt_f32(
1439 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1440 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1441 // CHECK:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1442 // CHECK:   ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t v1,float32x2_t v2)1443 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
1444   return vcalt_f32(v1, v2);
1445   // Using registers other than v0, v1 are possible, but would be odd.
1446 }
1447 
1448 // CHECK-LABEL: @test_vcalt_f64(
1449 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1450 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1451 // CHECK:   [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1452 // CHECK:   ret <1 x i64> [[VCALT_V2_I]]
test_vcalt_f64(float64x1_t a,float64x1_t b)1453 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
1454   return vcalt_f64(a, b);
1455 }
1456 
1457 // CHECK-LABEL: @test_vcaltq_f32(
1458 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1459 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1460 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1461 // CHECK:   ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t v1,float32x4_t v2)1462 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
1463   return vcaltq_f32(v1, v2);
1464   // Using registers other than v0, v1 are possible, but would be odd.
1465 }
1466 
1467 // CHECK-LABEL: @test_vcaltq_f64(
1468 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1469 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1470 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1471 // CHECK:   ret <2 x i64> [[VCALTQ_V2_I]]
test_vcaltq_f64(float64x2_t v1,float64x2_t v2)1472 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
1473   return vcaltq_f64(v1, v2);
1474   // Using registers other than v0, v1 are possible, but would be odd.
1475 }
1476 
1477 // CHECK-LABEL: @test_vtst_s8(
1478 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1479 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1480 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1481 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t v1,int8x8_t v2)1482 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
1483   return vtst_s8(v1, v2);
1484 }
1485 
1486 // CHECK-LABEL: @test_vtst_s16(
1487 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1488 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1489 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1490 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1491 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1492 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t v1,int16x4_t v2)1493 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
1494   return vtst_s16(v1, v2);
1495 }
1496 
1497 // CHECK-LABEL: @test_vtst_s32(
1498 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1499 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1500 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1501 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1502 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1503 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t v1,int32x2_t v2)1504 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
1505   return vtst_s32(v1, v2);
1506 }
1507 
1508 // CHECK-LABEL: @test_vtst_u8(
1509 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1510 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1511 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1512 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t v1,uint8x8_t v2)1513 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
1514   return vtst_u8(v1, v2);
1515 }
1516 
1517 // CHECK-LABEL: @test_vtst_u16(
1518 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1519 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1520 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1521 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1522 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1523 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t v1,uint16x4_t v2)1524 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
1525   return vtst_u16(v1, v2);
1526 }
1527 
1528 // CHECK-LABEL: @test_vtst_u32(
1529 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1530 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1531 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1532 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1533 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1534 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t v1,uint32x2_t v2)1535 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
1536   return vtst_u32(v1, v2);
1537 }
1538 
1539 // CHECK-LABEL: @test_vtstq_s8(
1540 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1541 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1542 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1543 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t v1,int8x16_t v2)1544 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
1545   return vtstq_s8(v1, v2);
1546 }
1547 
1548 // CHECK-LABEL: @test_vtstq_s16(
1549 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1550 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1551 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1552 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1553 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1554 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t v1,int16x8_t v2)1555 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
1556   return vtstq_s16(v1, v2);
1557 }
1558 
1559 // CHECK-LABEL: @test_vtstq_s32(
1560 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1561 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1562 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1563 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1564 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1565 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t v1,int32x4_t v2)1566 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
1567   return vtstq_s32(v1, v2);
1568 }
1569 
1570 // CHECK-LABEL: @test_vtstq_u8(
1571 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1572 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1573 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1574 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t v1,uint8x16_t v2)1575 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
1576   return vtstq_u8(v1, v2);
1577 }
1578 
1579 // CHECK-LABEL: @test_vtstq_u16(
1580 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1581 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1582 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1583 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1584 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1585 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t v1,uint16x8_t v2)1586 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
1587   return vtstq_u16(v1, v2);
1588 }
1589 
1590 // CHECK-LABEL: @test_vtstq_u32(
1591 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1592 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1593 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1594 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1595 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1596 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t v1,uint32x4_t v2)1597 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
1598   return vtstq_u32(v1, v2);
1599 }
1600 
1601 // CHECK-LABEL: @test_vtstq_s64(
1602 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1603 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1604 // CHECK:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1605 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1606 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1607 // CHECK:   ret <2 x i64> [[VTST_I]]
test_vtstq_s64(int64x2_t v1,int64x2_t v2)1608 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
1609   return vtstq_s64(v1, v2);
1610 }
1611 
1612 // CHECK-LABEL: @test_vtstq_u64(
1613 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1614 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1615 // CHECK:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1616 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1617 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1618 // CHECK:   ret <2 x i64> [[VTST_I]]
test_vtstq_u64(uint64x2_t v1,uint64x2_t v2)1619 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
1620   return vtstq_u64(v1, v2);
1621 }
1622 
1623 // CHECK-LABEL: @test_vtst_p8(
1624 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1625 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1626 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1627 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t v1,poly8x8_t v2)1628 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
1629   return vtst_p8(v1, v2);
1630 }
1631 
1632 // CHECK-LABEL: @test_vtst_p16(
1633 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1634 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1635 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1636 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1637 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1638 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t v1,poly16x4_t v2)1639 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
1640   return vtst_p16(v1, v2);
1641 }
1642 
1643 // CHECK-LABEL: @test_vtstq_p8(
1644 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1645 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1646 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1647 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t v1,poly8x16_t v2)1648 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
1649   return vtstq_p8(v1, v2);
1650 }
1651 
1652 // CHECK-LABEL: @test_vtstq_p16(
1653 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1654 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1655 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1656 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1657 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1658 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t v1,poly16x8_t v2)1659 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
1660   return vtstq_p16(v1, v2);
1661 }
1662 
1663 // CHECK-LABEL: @test_vtst_s64(
1664 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1665 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1666 // CHECK:   [[TMP2:%.*]] = and <1 x i64> %a, %b
1667 // CHECK:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1668 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1669 // CHECK:   ret <1 x i64> [[VTST_I]]
test_vtst_s64(int64x1_t a,int64x1_t b)1670 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
1671   return vtst_s64(a, b);
1672 }
1673 
1674 // CHECK-LABEL: @test_vtst_u64(
1675 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1676 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1677 // CHECK:   [[TMP2:%.*]] = and <1 x i64> %a, %b
1678 // CHECK:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1679 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1680 // CHECK:   ret <1 x i64> [[VTST_I]]
test_vtst_u64(uint64x1_t a,uint64x1_t b)1681 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
1682   return vtst_u64(a, b);
1683 }
1684 
1685 // CHECK-LABEL: @test_vceq_s8(
1686 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1687 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1688 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t v1,int8x8_t v2)1689 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
1690   return vceq_s8(v1, v2);
1691 }
1692 
1693 // CHECK-LABEL: @test_vceq_s16(
1694 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1695 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1696 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t v1,int16x4_t v2)1697 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
1698   return vceq_s16(v1, v2);
1699 }
1700 
1701 // CHECK-LABEL: @test_vceq_s32(
1702 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1703 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1704 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t v1,int32x2_t v2)1705 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
1706   return vceq_s32(v1, v2);
1707 }
1708 
1709 // CHECK-LABEL: @test_vceq_s64(
1710 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1711 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1712 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_s64(int64x1_t a,int64x1_t b)1713 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
1714   return vceq_s64(a, b);
1715 }
1716 
1717 // CHECK-LABEL: @test_vceq_u64(
1718 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1719 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1720 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_u64(uint64x1_t a,uint64x1_t b)1721 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
1722   return vceq_u64(a, b);
1723 }
1724 
1725 // CHECK-LABEL: @test_vceq_f32(
1726 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1727 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1728 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t v1,float32x2_t v2)1729 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
1730   return vceq_f32(v1, v2);
1731 }
1732 
1733 // CHECK-LABEL: @test_vceq_f64(
1734 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1735 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1736 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vceq_f64(float64x1_t a,float64x1_t b)1737 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
1738   return vceq_f64(a, b);
1739 }
1740 
1741 // CHECK-LABEL: @test_vceq_u8(
1742 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1743 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1744 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t v1,uint8x8_t v2)1745 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
1746   return vceq_u8(v1, v2);
1747 }
1748 
1749 // CHECK-LABEL: @test_vceq_u16(
1750 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1751 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1752 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t v1,uint16x4_t v2)1753 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
1754   return vceq_u16(v1, v2);
1755 }
1756 
1757 // CHECK-LABEL: @test_vceq_u32(
1758 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1759 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1760 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t v1,uint32x2_t v2)1761 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
1762   return vceq_u32(v1, v2);
1763 }
1764 
1765 // CHECK-LABEL: @test_vceq_p8(
1766 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1767 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1768 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t v1,poly8x8_t v2)1769 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
1770   return vceq_p8(v1, v2);
1771 }
1772 
1773 // CHECK-LABEL: @test_vceqq_s8(
1774 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1775 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1776 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t v1,int8x16_t v2)1777 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
1778   return vceqq_s8(v1, v2);
1779 }
1780 
1781 // CHECK-LABEL: @test_vceqq_s16(
1782 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1783 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1784 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t v1,int16x8_t v2)1785 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
1786   return vceqq_s16(v1, v2);
1787 }
1788 
1789 // CHECK-LABEL: @test_vceqq_s32(
1790 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1791 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1792 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t v1,int32x4_t v2)1793 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
1794   return vceqq_s32(v1, v2);
1795 }
1796 
1797 // CHECK-LABEL: @test_vceqq_f32(
1798 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1799 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1800 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t v1,float32x4_t v2)1801 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
1802   return vceqq_f32(v1, v2);
1803 }
1804 
1805 // CHECK-LABEL: @test_vceqq_u8(
1806 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1807 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1808 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t v1,uint8x16_t v2)1809 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
1810   return vceqq_u8(v1, v2);
1811 }
1812 
1813 // CHECK-LABEL: @test_vceqq_u16(
1814 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1815 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1816 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t v1,uint16x8_t v2)1817 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
1818   return vceqq_u16(v1, v2);
1819 }
1820 
1821 // CHECK-LABEL: @test_vceqq_u32(
1822 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1823 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1824 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t v1,uint32x4_t v2)1825 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
1826   return vceqq_u32(v1, v2);
1827 }
1828 
1829 // CHECK-LABEL: @test_vceqq_p8(
1830 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1831 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1832 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t v1,poly8x16_t v2)1833 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
1834   return vceqq_p8(v1, v2);
1835 }
1836 
1837 // CHECK-LABEL: @test_vceqq_s64(
1838 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1839 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1840 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_s64(int64x2_t v1,int64x2_t v2)1841 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
1842   return vceqq_s64(v1, v2);
1843 }
1844 
1845 // CHECK-LABEL: @test_vceqq_u64(
1846 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1847 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1848 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_u64(uint64x2_t v1,uint64x2_t v2)1849 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
1850   return vceqq_u64(v1, v2);
1851 }
1852 
1853 // CHECK-LABEL: @test_vceqq_f64(
1854 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
1855 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1856 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vceqq_f64(float64x2_t v1,float64x2_t v2)1857 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
1858   return vceqq_f64(v1, v2);
1859 }
1860 
1861 // CHECK-LABEL: @test_vcge_s8(
1862 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
1863 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1864 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t v1,int8x8_t v2)1865 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
1866   return vcge_s8(v1, v2);
1867 }
1868 
1869 // CHECK-LABEL: @test_vcge_s16(
1870 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
1871 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1872 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t v1,int16x4_t v2)1873 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
1874   return vcge_s16(v1, v2);
1875 }
1876 
1877 // CHECK-LABEL: @test_vcge_s32(
1878 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
1879 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1880 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t v1,int32x2_t v2)1881 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
1882   return vcge_s32(v1, v2);
1883 }
1884 
1885 // CHECK-LABEL: @test_vcge_s64(
1886 // CHECK:   [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
1887 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1888 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_s64(int64x1_t a,int64x1_t b)1889 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
1890   return vcge_s64(a, b);
1891 }
1892 
1893 // CHECK-LABEL: @test_vcge_u64(
1894 // CHECK:   [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
1895 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1896 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_u64(uint64x1_t a,uint64x1_t b)1897 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
1898   return vcge_u64(a, b);
1899 }
1900 
1901 // CHECK-LABEL: @test_vcge_f32(
1902 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
1903 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1904 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t v1,float32x2_t v2)1905 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
1906   return vcge_f32(v1, v2);
1907 }
1908 
1909 // CHECK-LABEL: @test_vcge_f64(
1910 // CHECK:   [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
1911 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1912 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcge_f64(float64x1_t a,float64x1_t b)1913 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
1914   return vcge_f64(a, b);
1915 }
1916 
1917 // CHECK-LABEL: @test_vcge_u8(
1918 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
1919 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1920 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t v1,uint8x8_t v2)1921 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
1922   return vcge_u8(v1, v2);
1923 }
1924 
1925 // CHECK-LABEL: @test_vcge_u16(
1926 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
1927 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1928 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t v1,uint16x4_t v2)1929 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
1930   return vcge_u16(v1, v2);
1931 }
1932 
1933 // CHECK-LABEL: @test_vcge_u32(
1934 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
1935 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1936 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t v1,uint32x2_t v2)1937 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
1938   return vcge_u32(v1, v2);
1939 }
1940 
1941 // CHECK-LABEL: @test_vcgeq_s8(
1942 // CHECK:   [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
1943 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1944 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t v1,int8x16_t v2)1945 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
1946   return vcgeq_s8(v1, v2);
1947 }
1948 
1949 // CHECK-LABEL: @test_vcgeq_s16(
1950 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
1951 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1952 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t v1,int16x8_t v2)1953 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
1954   return vcgeq_s16(v1, v2);
1955 }
1956 
1957 // CHECK-LABEL: @test_vcgeq_s32(
1958 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
1959 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1960 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t v1,int32x4_t v2)1961 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
1962   return vcgeq_s32(v1, v2);
1963 }
1964 
1965 // CHECK-LABEL: @test_vcgeq_f32(
1966 // CHECK:   [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
1967 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1968 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t v1,float32x4_t v2)1969 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
1970   return vcgeq_f32(v1, v2);
1971 }
1972 
1973 // CHECK-LABEL: @test_vcgeq_u8(
1974 // CHECK:   [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
1975 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1976 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t v1,uint8x16_t v2)1977 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
1978   return vcgeq_u8(v1, v2);
1979 }
1980 
1981 // CHECK-LABEL: @test_vcgeq_u16(
1982 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
1983 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1984 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t v1,uint16x8_t v2)1985 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
1986   return vcgeq_u16(v1, v2);
1987 }
1988 
1989 // CHECK-LABEL: @test_vcgeq_u32(
1990 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
1991 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1992 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t v1,uint32x4_t v2)1993 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
1994   return vcgeq_u32(v1, v2);
1995 }
1996 
1997 // CHECK-LABEL: @test_vcgeq_s64(
1998 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
1999 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2000 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_s64(int64x2_t v1,int64x2_t v2)2001 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
2002   return vcgeq_s64(v1, v2);
2003 }
2004 
2005 // CHECK-LABEL: @test_vcgeq_u64(
2006 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2007 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2008 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_u64(uint64x2_t v1,uint64x2_t v2)2009 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
2010   return vcgeq_u64(v1, v2);
2011 }
2012 
2013 // CHECK-LABEL: @test_vcgeq_f64(
2014 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2015 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2016 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgeq_f64(float64x2_t v1,float64x2_t v2)2017 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
2018   return vcgeq_f64(v1, v2);
2019 }
2020 
2021 // CHECK-LABEL: @test_vcle_s8(
2022 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2023 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2024 // CHECK:   ret <8 x i8> [[SEXT_I]]
2025 // Notes about vcle:
2026 // LE condition predicate implemented as GE, so check reversed operands.
2027 // Using registers other than v0, v1 are possible, but would be odd.
test_vcle_s8(int8x8_t v1,int8x8_t v2)2028 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
2029   return vcle_s8(v1, v2);
2030 }
2031 
2032 // CHECK-LABEL: @test_vcle_s16(
2033 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2034 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2035 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t v1,int16x4_t v2)2036 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
2037   return vcle_s16(v1, v2);
2038 }
2039 
2040 // CHECK-LABEL: @test_vcle_s32(
2041 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2042 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2043 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t v1,int32x2_t v2)2044 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
2045   return vcle_s32(v1, v2);
2046 }
2047 
2048 // CHECK-LABEL: @test_vcle_s64(
2049 // CHECK:   [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2050 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2051 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_s64(int64x1_t a,int64x1_t b)2052 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
2053   return vcle_s64(a, b);
2054 }
2055 
2056 // CHECK-LABEL: @test_vcle_u64(
2057 // CHECK:   [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2058 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2059 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_u64(uint64x1_t a,uint64x1_t b)2060 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
2061   return vcle_u64(a, b);
2062 }
2063 
2064 // CHECK-LABEL: @test_vcle_f32(
2065 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2066 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2067 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t v1,float32x2_t v2)2068 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
2069   return vcle_f32(v1, v2);
2070 }
2071 
2072 // CHECK-LABEL: @test_vcle_f64(
2073 // CHECK:   [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2074 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2075 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcle_f64(float64x1_t a,float64x1_t b)2076 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
2077   return vcle_f64(a, b);
2078 }
2079 
2080 // CHECK-LABEL: @test_vcle_u8(
2081 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2082 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2083 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t v1,uint8x8_t v2)2084 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
2085   return vcle_u8(v1, v2);
2086 }
2087 
2088 // CHECK-LABEL: @test_vcle_u16(
2089 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2090 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2091 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t v1,uint16x4_t v2)2092 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
2093   return vcle_u16(v1, v2);
2094 }
2095 
2096 // CHECK-LABEL: @test_vcle_u32(
2097 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2098 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2099 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t v1,uint32x2_t v2)2100 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
2101   return vcle_u32(v1, v2);
2102 }
2103 
2104 // CHECK-LABEL: @test_vcleq_s8(
2105 // CHECK:   [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2106 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2107 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t v1,int8x16_t v2)2108 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
2109   return vcleq_s8(v1, v2);
2110 }
2111 
2112 // CHECK-LABEL: @test_vcleq_s16(
2113 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2114 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2115 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t v1,int16x8_t v2)2116 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
2117   return vcleq_s16(v1, v2);
2118 }
2119 
2120 // CHECK-LABEL: @test_vcleq_s32(
2121 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2122 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2123 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t v1,int32x4_t v2)2124 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
2125   return vcleq_s32(v1, v2);
2126 }
2127 
2128 // CHECK-LABEL: @test_vcleq_f32(
2129 // CHECK:   [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2130 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2131 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t v1,float32x4_t v2)2132 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
2133   return vcleq_f32(v1, v2);
2134 }
2135 
2136 // CHECK-LABEL: @test_vcleq_u8(
2137 // CHECK:   [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2138 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2139 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t v1,uint8x16_t v2)2140 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
2141   return vcleq_u8(v1, v2);
2142 }
2143 
2144 // CHECK-LABEL: @test_vcleq_u16(
2145 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2146 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2147 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t v1,uint16x8_t v2)2148 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
2149   return vcleq_u16(v1, v2);
2150 }
2151 
2152 // CHECK-LABEL: @test_vcleq_u32(
2153 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2154 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2155 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t v1,uint32x4_t v2)2156 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
2157   return vcleq_u32(v1, v2);
2158 }
2159 
2160 // CHECK-LABEL: @test_vcleq_s64(
2161 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2162 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2163 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_s64(int64x2_t v1,int64x2_t v2)2164 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
2165   return vcleq_s64(v1, v2);
2166 }
2167 
2168 // CHECK-LABEL: @test_vcleq_u64(
2169 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2170 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2171 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_u64(uint64x2_t v1,uint64x2_t v2)2172 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
2173   return vcleq_u64(v1, v2);
2174 }
2175 
2176 // CHECK-LABEL: @test_vcleq_f64(
2177 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2178 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2179 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcleq_f64(float64x2_t v1,float64x2_t v2)2180 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
2181   return vcleq_f64(v1, v2);
2182 }
2183 
2184 // CHECK-LABEL: @test_vcgt_s8(
2185 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2186 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2187 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t v1,int8x8_t v2)2188 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
2189   return vcgt_s8(v1, v2);
2190 }
2191 
2192 // CHECK-LABEL: @test_vcgt_s16(
2193 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2194 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2195 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t v1,int16x4_t v2)2196 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
2197   return vcgt_s16(v1, v2);
2198 }
2199 
2200 // CHECK-LABEL: @test_vcgt_s32(
2201 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2202 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2203 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t v1,int32x2_t v2)2204 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
2205   return vcgt_s32(v1, v2);
2206 }
2207 
2208 // CHECK-LABEL: @test_vcgt_s64(
2209 // CHECK:   [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2210 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2211 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_s64(int64x1_t a,int64x1_t b)2212 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
2213   return vcgt_s64(a, b);
2214 }
2215 
2216 // CHECK-LABEL: @test_vcgt_u64(
2217 // CHECK:   [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2218 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2219 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_u64(uint64x1_t a,uint64x1_t b)2220 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
2221   return vcgt_u64(a, b);
2222 }
2223 
2224 // CHECK-LABEL: @test_vcgt_f32(
2225 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2226 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2227 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t v1,float32x2_t v2)2228 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
2229   return vcgt_f32(v1, v2);
2230 }
2231 
2232 // CHECK-LABEL: @test_vcgt_f64(
2233 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2234 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2235 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vcgt_f64(float64x1_t a,float64x1_t b)2236 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
2237   return vcgt_f64(a, b);
2238 }
2239 
2240 // CHECK-LABEL: @test_vcgt_u8(
2241 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2242 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2243 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t v1,uint8x8_t v2)2244 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
2245   return vcgt_u8(v1, v2);
2246 }
2247 
2248 // CHECK-LABEL: @test_vcgt_u16(
2249 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2250 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2251 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t v1,uint16x4_t v2)2252 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
2253   return vcgt_u16(v1, v2);
2254 }
2255 
2256 // CHECK-LABEL: @test_vcgt_u32(
2257 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2258 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2259 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t v1,uint32x2_t v2)2260 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
2261   return vcgt_u32(v1, v2);
2262 }
2263 
2264 // CHECK-LABEL: @test_vcgtq_s8(
2265 // CHECK:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2266 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2267 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t v1,int8x16_t v2)2268 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
2269   return vcgtq_s8(v1, v2);
2270 }
2271 
2272 // CHECK-LABEL: @test_vcgtq_s16(
2273 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2274 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2275 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t v1,int16x8_t v2)2276 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
2277   return vcgtq_s16(v1, v2);
2278 }
2279 
2280 // CHECK-LABEL: @test_vcgtq_s32(
2281 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2282 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2283 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t v1,int32x4_t v2)2284 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
2285   return vcgtq_s32(v1, v2);
2286 }
2287 
2288 // CHECK-LABEL: @test_vcgtq_f32(
2289 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2290 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2291 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t v1,float32x4_t v2)2292 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
2293   return vcgtq_f32(v1, v2);
2294 }
2295 
2296 // CHECK-LABEL: @test_vcgtq_u8(
2297 // CHECK:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2298 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2299 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t v1,uint8x16_t v2)2300 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
2301   return vcgtq_u8(v1, v2);
2302 }
2303 
2304 // CHECK-LABEL: @test_vcgtq_u16(
2305 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2306 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2307 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t v1,uint16x8_t v2)2308 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
2309   return vcgtq_u16(v1, v2);
2310 }
2311 
2312 // CHECK-LABEL: @test_vcgtq_u32(
2313 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2314 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2315 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t v1,uint32x4_t v2)2316 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
2317   return vcgtq_u32(v1, v2);
2318 }
2319 
2320 // CHECK-LABEL: @test_vcgtq_s64(
2321 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2322 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2323 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_s64(int64x2_t v1,int64x2_t v2)2324 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
2325   return vcgtq_s64(v1, v2);
2326 }
2327 
2328 // CHECK-LABEL: @test_vcgtq_u64(
2329 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2330 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2331 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_u64(uint64x2_t v1,uint64x2_t v2)2332 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
2333   return vcgtq_u64(v1, v2);
2334 }
2335 
2336 // CHECK-LABEL: @test_vcgtq_f64(
2337 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2338 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2339 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcgtq_f64(float64x2_t v1,float64x2_t v2)2340 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
2341   return vcgtq_f64(v1, v2);
2342 }
2343 
2344 // CHECK-LABEL: @test_vclt_s8(
2345 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2346 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2347 // CHECK:   ret <8 x i8> [[SEXT_I]]
2348 // Notes about vclt:
2349 // LT condition predicate implemented as GT, so check reversed operands.
2350 // Using registers other than v0, v1 are possible, but would be odd.
test_vclt_s8(int8x8_t v1,int8x8_t v2)2351 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
2352   return vclt_s8(v1, v2);
2353 }
2354 
2355 // CHECK-LABEL: @test_vclt_s16(
2356 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2357 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2358 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t v1,int16x4_t v2)2359 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
2360   return vclt_s16(v1, v2);
2361 }
2362 
2363 // CHECK-LABEL: @test_vclt_s32(
2364 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2365 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2366 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t v1,int32x2_t v2)2367 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
2368   return vclt_s32(v1, v2);
2369 }
2370 
2371 // CHECK-LABEL: @test_vclt_s64(
2372 // CHECK:   [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2373 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2374 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_s64(int64x1_t a,int64x1_t b)2375 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
2376   return vclt_s64(a, b);
2377 }
2378 
2379 // CHECK-LABEL: @test_vclt_u64(
2380 // CHECK:   [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2381 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2382 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_u64(uint64x1_t a,uint64x1_t b)2383 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
2384   return vclt_u64(a, b);
2385 }
2386 
2387 // CHECK-LABEL: @test_vclt_f32(
2388 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2389 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2390 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t v1,float32x2_t v2)2391 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
2392   return vclt_f32(v1, v2);
2393 }
2394 
2395 // CHECK-LABEL: @test_vclt_f64(
2396 // CHECK:   [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2397 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2398 // CHECK:   ret <1 x i64> [[SEXT_I]]
test_vclt_f64(float64x1_t a,float64x1_t b)2399 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
2400   return vclt_f64(a, b);
2401 }
2402 
2403 // CHECK-LABEL: @test_vclt_u8(
2404 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2405 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2406 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t v1,uint8x8_t v2)2407 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
2408   return vclt_u8(v1, v2);
2409 }
2410 
2411 // CHECK-LABEL: @test_vclt_u16(
2412 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2413 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2414 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t v1,uint16x4_t v2)2415 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
2416   return vclt_u16(v1, v2);
2417 }
2418 
2419 // CHECK-LABEL: @test_vclt_u32(
2420 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2421 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2422 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t v1,uint32x2_t v2)2423 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
2424   return vclt_u32(v1, v2);
2425 }
2426 
2427 // CHECK-LABEL: @test_vcltq_s8(
2428 // CHECK:   [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2429 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2430 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t v1,int8x16_t v2)2431 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
2432   return vcltq_s8(v1, v2);
2433 }
2434 
2435 // CHECK-LABEL: @test_vcltq_s16(
2436 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2437 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2438 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t v1,int16x8_t v2)2439 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
2440   return vcltq_s16(v1, v2);
2441 }
2442 
2443 // CHECK-LABEL: @test_vcltq_s32(
2444 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2445 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2446 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t v1,int32x4_t v2)2447 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
2448   return vcltq_s32(v1, v2);
2449 }
2450 
2451 // CHECK-LABEL: @test_vcltq_f32(
2452 // CHECK:   [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2453 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2454 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t v1,float32x4_t v2)2455 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
2456   return vcltq_f32(v1, v2);
2457 }
2458 
2459 // CHECK-LABEL: @test_vcltq_u8(
2460 // CHECK:   [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2461 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2462 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t v1,uint8x16_t v2)2463 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
2464   return vcltq_u8(v1, v2);
2465 }
2466 
2467 // CHECK-LABEL: @test_vcltq_u16(
2468 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2469 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2470 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t v1,uint16x8_t v2)2471 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
2472   return vcltq_u16(v1, v2);
2473 }
2474 
2475 // CHECK-LABEL: @test_vcltq_u32(
2476 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2477 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2478 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t v1,uint32x4_t v2)2479 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
2480   return vcltq_u32(v1, v2);
2481 }
2482 
2483 // CHECK-LABEL: @test_vcltq_s64(
2484 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2485 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2486 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_s64(int64x2_t v1,int64x2_t v2)2487 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
2488   return vcltq_s64(v1, v2);
2489 }
2490 
2491 // CHECK-LABEL: @test_vcltq_u64(
2492 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2493 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2494 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_u64(uint64x2_t v1,uint64x2_t v2)2495 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
2496   return vcltq_u64(v1, v2);
2497 }
2498 
2499 // CHECK-LABEL: @test_vcltq_f64(
2500 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2501 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2502 // CHECK:   ret <2 x i64> [[SEXT_I]]
test_vcltq_f64(float64x2_t v1,float64x2_t v2)2503 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
2504   return vcltq_f64(v1, v2);
2505 }
2506 
2507 // CHECK-LABEL: @test_vhadd_s8(
2508 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2509 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t v1,int8x8_t v2)2510 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
2511   return vhadd_s8(v1, v2);
2512 }
2513 
2514 // CHECK-LABEL: @test_vhadd_s16(
2515 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2516 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2517 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2518 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2519 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_s16(int16x4_t v1,int16x4_t v2)2520 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
2521   return vhadd_s16(v1, v2);
2522 }
2523 
2524 // CHECK-LABEL: @test_vhadd_s32(
2525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2527 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2528 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2529 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_s32(int32x2_t v1,int32x2_t v2)2530 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
2531   return vhadd_s32(v1, v2);
2532 }
2533 
2534 // CHECK-LABEL: @test_vhadd_u8(
2535 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2536 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t v1,uint8x8_t v2)2537 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2538   return vhadd_u8(v1, v2);
2539 }
2540 
2541 // CHECK-LABEL: @test_vhadd_u16(
2542 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2543 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2544 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2545 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2546 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_u16(uint16x4_t v1,uint16x4_t v2)2547 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2548   return vhadd_u16(v1, v2);
2549 }
2550 
2551 // CHECK-LABEL: @test_vhadd_u32(
2552 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2553 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2554 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2555 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2556 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_u32(uint32x2_t v1,uint32x2_t v2)2557 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2558   return vhadd_u32(v1, v2);
2559 }
2560 
2561 // CHECK-LABEL: @test_vhaddq_s8(
2562 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2563 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t v1,int8x16_t v2)2564 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
2565   return vhaddq_s8(v1, v2);
2566 }
2567 
2568 // CHECK-LABEL: @test_vhaddq_s16(
2569 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2570 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2571 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2572 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2573 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_s16(int16x8_t v1,int16x8_t v2)2574 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
2575   return vhaddq_s16(v1, v2);
2576 }
2577 
2578 // CHECK-LABEL: @test_vhaddq_s32(
2579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2580 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2581 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2582 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2583 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_s32(int32x4_t v1,int32x4_t v2)2584 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
2585   return vhaddq_s32(v1, v2);
2586 }
2587 
2588 // CHECK-LABEL: @test_vhaddq_u8(
2589 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2590 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t v1,uint8x16_t v2)2591 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2592   return vhaddq_u8(v1, v2);
2593 }
2594 
2595 // CHECK-LABEL: @test_vhaddq_u16(
2596 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2597 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2598 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2599 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2600 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_u16(uint16x8_t v1,uint16x8_t v2)2601 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2602   return vhaddq_u16(v1, v2);
2603 }
2604 
2605 // CHECK-LABEL: @test_vhaddq_u32(
2606 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2607 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2608 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2609 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2610 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_u32(uint32x4_t v1,uint32x4_t v2)2611 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2612   return vhaddq_u32(v1, v2);
2613 }
2614 
2615 // CHECK-LABEL: @test_vhsub_s8(
2616 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2617 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t v1,int8x8_t v2)2618 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
2619   return vhsub_s8(v1, v2);
2620 }
2621 
2622 // CHECK-LABEL: @test_vhsub_s16(
2623 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2624 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2625 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2626 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2627 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_s16(int16x4_t v1,int16x4_t v2)2628 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
2629   return vhsub_s16(v1, v2);
2630 }
2631 
2632 // CHECK-LABEL: @test_vhsub_s32(
2633 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2634 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2635 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2636 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2637 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_s32(int32x2_t v1,int32x2_t v2)2638 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
2639   return vhsub_s32(v1, v2);
2640 }
2641 
2642 // CHECK-LABEL: @test_vhsub_u8(
2643 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2644 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t v1,uint8x8_t v2)2645 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
2646   return vhsub_u8(v1, v2);
2647 }
2648 
2649 // CHECK-LABEL: @test_vhsub_u16(
2650 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2651 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2652 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2653 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2654 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_u16(uint16x4_t v1,uint16x4_t v2)2655 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
2656   return vhsub_u16(v1, v2);
2657 }
2658 
2659 // CHECK-LABEL: @test_vhsub_u32(
2660 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2661 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2662 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2663 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2664 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_u32(uint32x2_t v1,uint32x2_t v2)2665 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
2666   return vhsub_u32(v1, v2);
2667 }
2668 
2669 // CHECK-LABEL: @test_vhsubq_s8(
2670 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2671 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t v1,int8x16_t v2)2672 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
2673   return vhsubq_s8(v1, v2);
2674 }
2675 
2676 // CHECK-LABEL: @test_vhsubq_s16(
2677 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2678 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2679 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2680 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2681 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_s16(int16x8_t v1,int16x8_t v2)2682 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
2683   return vhsubq_s16(v1, v2);
2684 }
2685 
2686 // CHECK-LABEL: @test_vhsubq_s32(
2687 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2688 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2689 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2690 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2691 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_s32(int32x4_t v1,int32x4_t v2)2692 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
2693   return vhsubq_s32(v1, v2);
2694 }
2695 
2696 // CHECK-LABEL: @test_vhsubq_u8(
2697 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2698 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t v1,uint8x16_t v2)2699 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
2700   return vhsubq_u8(v1, v2);
2701 }
2702 
2703 // CHECK-LABEL: @test_vhsubq_u16(
2704 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2705 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2706 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2707 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2708 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_u16(uint16x8_t v1,uint16x8_t v2)2709 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
2710   return vhsubq_u16(v1, v2);
2711 }
2712 
2713 // CHECK-LABEL: @test_vhsubq_u32(
2714 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2715 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2716 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2717 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2718 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_u32(uint32x4_t v1,uint32x4_t v2)2719 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
2720   return vhsubq_u32(v1, v2);
2721 }
2722 
2723 // CHECK-LABEL: @test_vrhadd_s8(
2724 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2725 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t v1,int8x8_t v2)2726 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
2727   return vrhadd_s8(v1, v2);
2728 }
2729 
2730 // CHECK-LABEL: @test_vrhadd_s16(
2731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2732 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2733 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2734 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2735 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_s16(int16x4_t v1,int16x4_t v2)2736 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
2737   return vrhadd_s16(v1, v2);
2738 }
2739 
2740 // CHECK-LABEL: @test_vrhadd_s32(
2741 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2742 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2743 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2744 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2745 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_s32(int32x2_t v1,int32x2_t v2)2746 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
2747   return vrhadd_s32(v1, v2);
2748 }
2749 
2750 // CHECK-LABEL: @test_vrhadd_u8(
2751 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2752 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t v1,uint8x8_t v2)2753 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2754   return vrhadd_u8(v1, v2);
2755 }
2756 
2757 // CHECK-LABEL: @test_vrhadd_u16(
2758 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2759 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2760 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2761 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2762 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_u16(uint16x4_t v1,uint16x4_t v2)2763 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2764   return vrhadd_u16(v1, v2);
2765 }
2766 
2767 // CHECK-LABEL: @test_vrhadd_u32(
2768 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2769 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2770 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2771 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2772 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_u32(uint32x2_t v1,uint32x2_t v2)2773 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2774   return vrhadd_u32(v1, v2);
2775 }
2776 
2777 // CHECK-LABEL: @test_vrhaddq_s8(
2778 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2779 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t v1,int8x16_t v2)2780 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
2781   return vrhaddq_s8(v1, v2);
2782 }
2783 
2784 // CHECK-LABEL: @test_vrhaddq_s16(
2785 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2786 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2787 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2788 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2789 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_s16(int16x8_t v1,int16x8_t v2)2790 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
2791   return vrhaddq_s16(v1, v2);
2792 }
2793 
2794 // CHECK-LABEL: @test_vrhaddq_s32(
2795 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2796 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2797 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2798 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2799 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_s32(int32x4_t v1,int32x4_t v2)2800 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
2801   return vrhaddq_s32(v1, v2);
2802 }
2803 
2804 // CHECK-LABEL: @test_vrhaddq_u8(
2805 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2806 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t v1,uint8x16_t v2)2807 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2808   return vrhaddq_u8(v1, v2);
2809 }
2810 
2811 // CHECK-LABEL: @test_vrhaddq_u16(
2812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2813 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2814 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2815 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2816 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_u16(uint16x8_t v1,uint16x8_t v2)2817 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2818   return vrhaddq_u16(v1, v2);
2819 }
2820 
2821 // CHECK-LABEL: @test_vrhaddq_u32(
2822 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2823 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2824 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2825 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2826 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_u32(uint32x4_t v1,uint32x4_t v2)2827 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2828   return vrhaddq_u32(v1, v2);
2829 }
2830 
2831 // CHECK-LABEL: @test_vqadd_s8(
2832 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2833 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)2834 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
2835   return vqadd_s8(a, b);
2836 }
2837 
2838 // CHECK-LABEL: @test_vqadd_s16(
2839 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2840 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2841 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2842 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2843 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_s16(int16x4_t a,int16x4_t b)2844 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
2845   return vqadd_s16(a, b);
2846 }
2847 
2848 // CHECK-LABEL: @test_vqadd_s32(
2849 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2850 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2851 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2852 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2853 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_s32(int32x2_t a,int32x2_t b)2854 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
2855   return vqadd_s32(a, b);
2856 }
2857 
2858 // CHECK-LABEL: @test_vqadd_s64(
2859 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2860 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2861 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2862 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2863 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_s64(int64x1_t a,int64x1_t b)2864 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
2865   return vqadd_s64(a, b);
2866 }
2867 
2868 // CHECK-LABEL: @test_vqadd_u8(
2869 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2870 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)2871 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
2872   return vqadd_u8(a, b);
2873 }
2874 
2875 // CHECK-LABEL: @test_vqadd_u16(
2876 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2877 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2878 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2879 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2880 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)2881 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
2882   return vqadd_u16(a, b);
2883 }
2884 
2885 // CHECK-LABEL: @test_vqadd_u32(
2886 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2887 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2888 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2889 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2890 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)2891 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
2892   return vqadd_u32(a, b);
2893 }
2894 
2895 // CHECK-LABEL: @test_vqadd_u64(
2896 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2897 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2898 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2899 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2900 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)2901 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
2902   return vqadd_u64(a, b);
2903 }
2904 
2905 // CHECK-LABEL: @test_vqaddq_s8(
2906 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2907 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)2908 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
2909   return vqaddq_s8(a, b);
2910 }
2911 
2912 // CHECK-LABEL: @test_vqaddq_s16(
2913 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2914 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2915 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2916 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2917 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)2918 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
2919   return vqaddq_s16(a, b);
2920 }
2921 
2922 // CHECK-LABEL: @test_vqaddq_s32(
2923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2925 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2926 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2927 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)2928 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
2929   return vqaddq_s32(a, b);
2930 }
2931 
2932 // CHECK-LABEL: @test_vqaddq_s64(
2933 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2934 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2935 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2936 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2937 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)2938 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
2939   return vqaddq_s64(a, b);
2940 }
2941 
2942 // CHECK-LABEL: @test_vqaddq_u8(
2943 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2944 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)2945 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
2946   return vqaddq_u8(a, b);
2947 }
2948 
2949 // CHECK-LABEL: @test_vqaddq_u16(
2950 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2951 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2952 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2953 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2954 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)2955 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
2956   return vqaddq_u16(a, b);
2957 }
2958 
2959 // CHECK-LABEL: @test_vqaddq_u32(
2960 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2961 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2962 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2963 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2964 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)2965 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
2966   return vqaddq_u32(a, b);
2967 }
2968 
2969 // CHECK-LABEL: @test_vqaddq_u64(
2970 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2971 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2972 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2973 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2974 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)2975 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
2976   return vqaddq_u64(a, b);
2977 }
2978 
2979 // CHECK-LABEL: @test_vqsub_s8(
2980 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
2981 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)2982 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
2983   return vqsub_s8(a, b);
2984 }
2985 
2986 // CHECK-LABEL: @test_vqsub_s16(
2987 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2988 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2989 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
2990 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
2991 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_s16(int16x4_t a,int16x4_t b)2992 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
2993   return vqsub_s16(a, b);
2994 }
2995 
2996 // CHECK-LABEL: @test_vqsub_s32(
2997 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2998 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2999 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3000 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3001 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_s32(int32x2_t a,int32x2_t b)3002 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
3003   return vqsub_s32(a, b);
3004 }
3005 
3006 // CHECK-LABEL: @test_vqsub_s64(
3007 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3008 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3009 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3010 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3011 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_s64(int64x1_t a,int64x1_t b)3012 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
3013   return vqsub_s64(a, b);
3014 }
3015 
3016 // CHECK-LABEL: @test_vqsub_u8(
3017 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
3018 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)3019 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
3020   return vqsub_u8(a, b);
3021 }
3022 
3023 // CHECK-LABEL: @test_vqsub_u16(
3024 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3025 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3026 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
3027 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3028 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)3029 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
3030   return vqsub_u16(a, b);
3031 }
3032 
3033 // CHECK-LABEL: @test_vqsub_u32(
3034 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3035 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3036 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3037 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3038 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)3039 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
3040   return vqsub_u32(a, b);
3041 }
3042 
3043 // CHECK-LABEL: @test_vqsub_u64(
3044 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3045 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3046 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3047 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3048 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)3049 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
3050   return vqsub_u64(a, b);
3051 }
3052 
3053 // CHECK-LABEL: @test_vqsubq_s8(
3054 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3055 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)3056 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
3057   return vqsubq_s8(a, b);
3058 }
3059 
3060 // CHECK-LABEL: @test_vqsubq_s16(
3061 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3062 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3063 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3064 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3065 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)3066 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
3067   return vqsubq_s16(a, b);
3068 }
3069 
3070 // CHECK-LABEL: @test_vqsubq_s32(
3071 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3072 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3073 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3074 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3075 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)3076 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
3077   return vqsubq_s32(a, b);
3078 }
3079 
3080 // CHECK-LABEL: @test_vqsubq_s64(
3081 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3082 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3083 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3084 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3085 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)3086 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
3087   return vqsubq_s64(a, b);
3088 }
3089 
3090 // CHECK-LABEL: @test_vqsubq_u8(
3091 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3092 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)3093 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
3094   return vqsubq_u8(a, b);
3095 }
3096 
3097 // CHECK-LABEL: @test_vqsubq_u16(
3098 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3099 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3100 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3101 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3102 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)3103 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
3104   return vqsubq_u16(a, b);
3105 }
3106 
3107 // CHECK-LABEL: @test_vqsubq_u32(
3108 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3109 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3110 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3111 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3112 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)3113 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
3114   return vqsubq_u32(a, b);
3115 }
3116 
3117 // CHECK-LABEL: @test_vqsubq_u64(
3118 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3119 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3120 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3121 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3122 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)3123 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
3124   return vqsubq_u64(a, b);
3125 }
3126 
3127 // CHECK-LABEL: @test_vshl_s8(
3128 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3129 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)3130 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
3131   return vshl_s8(a, b);
3132 }
3133 
3134 // CHECK-LABEL: @test_vshl_s16(
3135 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3136 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3137 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3138 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3139 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_s16(int16x4_t a,int16x4_t b)3140 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
3141   return vshl_s16(a, b);
3142 }
3143 
3144 // CHECK-LABEL: @test_vshl_s32(
3145 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3146 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3147 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3148 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3149 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_s32(int32x2_t a,int32x2_t b)3150 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
3151   return vshl_s32(a, b);
3152 }
3153 
3154 // CHECK-LABEL: @test_vshl_s64(
3155 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3156 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3157 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3158 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3159 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_s64(int64x1_t a,int64x1_t b)3160 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
3161   return vshl_s64(a, b);
3162 }
3163 
3164 // CHECK-LABEL: @test_vshl_u8(
3165 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b)
3166 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)3167 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
3168   return vshl_u8(a, b);
3169 }
3170 
3171 // CHECK-LABEL: @test_vshl_u16(
3172 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3173 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3174 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b)
3175 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3176 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_u16(uint16x4_t a,int16x4_t b)3177 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
3178   return vshl_u16(a, b);
3179 }
3180 
3181 // CHECK-LABEL: @test_vshl_u32(
3182 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3183 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3184 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b)
3185 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3186 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_u32(uint32x2_t a,int32x2_t b)3187 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
3188   return vshl_u32(a, b);
3189 }
3190 
3191 // CHECK-LABEL: @test_vshl_u64(
3192 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3193 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3194 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b)
3195 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3196 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_u64(uint64x1_t a,int64x1_t b)3197 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
3198   return vshl_u64(a, b);
3199 }
3200 
3201 // CHECK-LABEL: @test_vshlq_s8(
3202 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3203 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)3204 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3205   return vshlq_s8(a, b);
3206 }
3207 
3208 // CHECK-LABEL: @test_vshlq_s16(
3209 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3210 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3211 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3212 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3213 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_s16(int16x8_t a,int16x8_t b)3214 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3215   return vshlq_s16(a, b);
3216 }
3217 
3218 // CHECK-LABEL: @test_vshlq_s32(
3219 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3220 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3221 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3222 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3223 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_s32(int32x4_t a,int32x4_t b)3224 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3225   return vshlq_s32(a, b);
3226 }
3227 
3228 // CHECK-LABEL: @test_vshlq_s64(
3229 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3230 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3231 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3232 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3233 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_s64(int64x2_t a,int64x2_t b)3234 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3235   return vshlq_s64(a, b);
3236 }
3237 
3238 // CHECK-LABEL: @test_vshlq_u8(
3239 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
3240 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)3241 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3242   return vshlq_u8(a, b);
3243 }
3244 
3245 // CHECK-LABEL: @test_vshlq_u16(
3246 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3247 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3248 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
3249 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3250 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)3251 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3252   return vshlq_u16(a, b);
3253 }
3254 
3255 // CHECK-LABEL: @test_vshlq_u32(
3256 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3257 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3258 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
3259 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3260 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)3261 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3262   return vshlq_u32(a, b);
3263 }
3264 
3265 // CHECK-LABEL: @test_vshlq_u64(
3266 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3267 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3268 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
3269 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3270 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)3271 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3272   return vshlq_u64(a, b);
3273 }
3274 
3275 // CHECK-LABEL: @test_vqshl_s8(
3276 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3277 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)3278 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
3279   return vqshl_s8(a, b);
3280 }
3281 
3282 // CHECK-LABEL: @test_vqshl_s16(
3283 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3286 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3287 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_s16(int16x4_t a,int16x4_t b)3288 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
3289   return vqshl_s16(a, b);
3290 }
3291 
3292 // CHECK-LABEL: @test_vqshl_s32(
3293 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3294 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3295 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3296 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3297 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_s32(int32x2_t a,int32x2_t b)3298 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
3299   return vqshl_s32(a, b);
3300 }
3301 
3302 // CHECK-LABEL: @test_vqshl_s64(
3303 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3304 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3305 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3306 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3307 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_s64(int64x1_t a,int64x1_t b)3308 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
3309   return vqshl_s64(a, b);
3310 }
3311 
3312 // CHECK-LABEL: @test_vqshl_u8(
3313 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3314 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)3315 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
3316   return vqshl_u8(a, b);
3317 }
3318 
3319 // CHECK-LABEL: @test_vqshl_u16(
3320 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3321 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3322 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3323 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3324 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)3325 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
3326   return vqshl_u16(a, b);
3327 }
3328 
3329 // CHECK-LABEL: @test_vqshl_u32(
3330 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3331 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3332 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3333 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3334 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)3335 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
3336   return vqshl_u32(a, b);
3337 }
3338 
3339 // CHECK-LABEL: @test_vqshl_u64(
3340 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3341 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3342 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3343 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3344 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)3345 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
3346   return vqshl_u64(a, b);
3347 }
3348 
3349 // CHECK-LABEL: @test_vqshlq_s8(
3350 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3351 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)3352 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
3353   return vqshlq_s8(a, b);
3354 }
3355 
3356 // CHECK-LABEL: @test_vqshlq_s16(
3357 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3358 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3359 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3360 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3361 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)3362 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
3363   return vqshlq_s16(a, b);
3364 }
3365 
3366 // CHECK-LABEL: @test_vqshlq_s32(
3367 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3368 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3369 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3370 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3371 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)3372 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
3373   return vqshlq_s32(a, b);
3374 }
3375 
3376 // CHECK-LABEL: @test_vqshlq_s64(
3377 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3378 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3379 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3380 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3381 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)3382 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
3383   return vqshlq_s64(a, b);
3384 }
3385 
3386 // CHECK-LABEL: @test_vqshlq_u8(
3387 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3388 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)3389 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
3390   return vqshlq_u8(a, b);
3391 }
3392 
3393 // CHECK-LABEL: @test_vqshlq_u16(
3394 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3395 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3396 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3397 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3398 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)3399 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
3400   return vqshlq_u16(a, b);
3401 }
3402 
3403 // CHECK-LABEL: @test_vqshlq_u32(
3404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3405 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3406 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3407 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3408 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)3409 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
3410   return vqshlq_u32(a, b);
3411 }
3412 
3413 // CHECK-LABEL: @test_vqshlq_u64(
3414 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3415 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3416 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3417 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3418 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)3419 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
3420   return vqshlq_u64(a, b);
3421 }
3422 
3423 // CHECK-LABEL: @test_vrshl_s8(
3424 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3425 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)3426 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3427   return vrshl_s8(a, b);
3428 }
3429 
3430 // CHECK-LABEL: @test_vrshl_s16(
3431 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3432 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3433 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3434 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3435 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_s16(int16x4_t a,int16x4_t b)3436 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3437   return vrshl_s16(a, b);
3438 }
3439 
3440 // CHECK-LABEL: @test_vrshl_s32(
3441 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3442 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3443 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3444 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3445 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_s32(int32x2_t a,int32x2_t b)3446 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3447   return vrshl_s32(a, b);
3448 }
3449 
3450 // CHECK-LABEL: @test_vrshl_s64(
3451 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3452 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3453 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3454 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3455 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_s64(int64x1_t a,int64x1_t b)3456 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3457   return vrshl_s64(a, b);
3458 }
3459 
3460 // CHECK-LABEL: @test_vrshl_u8(
3461 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3462 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)3463 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3464   return vrshl_u8(a, b);
3465 }
3466 
3467 // CHECK-LABEL: @test_vrshl_u16(
3468 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3471 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3472 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)3473 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3474   return vrshl_u16(a, b);
3475 }
3476 
3477 // CHECK-LABEL: @test_vrshl_u32(
3478 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3479 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3480 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3481 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3482 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)3483 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3484   return vrshl_u32(a, b);
3485 }
3486 
3487 // CHECK-LABEL: @test_vrshl_u64(
3488 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3489 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3490 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3491 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3492 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)3493 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3494   return vrshl_u64(a, b);
3495 }
3496 
3497 // CHECK-LABEL: @test_vrshlq_s8(
3498 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3499 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)3500 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3501   return vrshlq_s8(a, b);
3502 }
3503 
3504 // CHECK-LABEL: @test_vrshlq_s16(
3505 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3506 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3507 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3508 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3509 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)3510 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3511   return vrshlq_s16(a, b);
3512 }
3513 
3514 // CHECK-LABEL: @test_vrshlq_s32(
3515 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3516 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3517 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3518 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3519 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)3520 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3521   return vrshlq_s32(a, b);
3522 }
3523 
3524 // CHECK-LABEL: @test_vrshlq_s64(
3525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3527 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3528 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3529 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)3530 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3531   return vrshlq_s64(a, b);
3532 }
3533 
3534 // CHECK-LABEL: @test_vrshlq_u8(
3535 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3536 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)3537 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3538   return vrshlq_u8(a, b);
3539 }
3540 
3541 // CHECK-LABEL: @test_vrshlq_u16(
3542 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3543 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3544 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3545 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3546 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)3547 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3548   return vrshlq_u16(a, b);
3549 }
3550 
3551 // CHECK-LABEL: @test_vrshlq_u32(
3552 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3553 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3554 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3555 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3556 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)3557 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3558   return vrshlq_u32(a, b);
3559 }
3560 
3561 // CHECK-LABEL: @test_vrshlq_u64(
3562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3563 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3564 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3565 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3566 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)3567 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3568   return vrshlq_u64(a, b);
3569 }
3570 
3571 // CHECK-LABEL: @test_vqrshl_s8(
3572 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3573 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)3574 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
3575   return vqrshl_s8(a, b);
3576 }
3577 
3578 // CHECK-LABEL: @test_vqrshl_s16(
3579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3580 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3581 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3582 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3583 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)3584 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
3585   return vqrshl_s16(a, b);
3586 }
3587 
3588 // CHECK-LABEL: @test_vqrshl_s32(
3589 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3590 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3591 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3592 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3593 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)3594 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
3595   return vqrshl_s32(a, b);
3596 }
3597 
3598 // CHECK-LABEL: @test_vqrshl_s64(
3599 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3600 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3601 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3602 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3603 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)3604 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
3605   return vqrshl_s64(a, b);
3606 }
3607 
3608 // CHECK-LABEL: @test_vqrshl_u8(
3609 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3610 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)3611 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
3612   return vqrshl_u8(a, b);
3613 }
3614 
3615 // CHECK-LABEL: @test_vqrshl_u16(
3616 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3617 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3618 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3619 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3620 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)3621 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
3622   return vqrshl_u16(a, b);
3623 }
3624 
3625 // CHECK-LABEL: @test_vqrshl_u32(
3626 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3628 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3629 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3630 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)3631 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
3632   return vqrshl_u32(a, b);
3633 }
3634 
3635 // CHECK-LABEL: @test_vqrshl_u64(
3636 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3637 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3638 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3639 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3640 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)3641 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
3642   return vqrshl_u64(a, b);
3643 }
3644 
3645 // CHECK-LABEL: @test_vqrshlq_s8(
3646 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3647 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)3648 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
3649   return vqrshlq_s8(a, b);
3650 }
3651 
3652 // CHECK-LABEL: @test_vqrshlq_s16(
3653 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3654 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3655 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3656 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3657 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)3658 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
3659   return vqrshlq_s16(a, b);
3660 }
3661 
3662 // CHECK-LABEL: @test_vqrshlq_s32(
3663 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3664 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3665 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3666 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3667 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)3668 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
3669   return vqrshlq_s32(a, b);
3670 }
3671 
3672 // CHECK-LABEL: @test_vqrshlq_s64(
3673 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3674 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3675 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3676 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3677 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)3678 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
3679   return vqrshlq_s64(a, b);
3680 }
3681 
3682 // CHECK-LABEL: @test_vqrshlq_u8(
3683 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3684 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)3685 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
3686   return vqrshlq_u8(a, b);
3687 }
3688 
3689 // CHECK-LABEL: @test_vqrshlq_u16(
3690 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3691 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3692 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3693 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3694 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)3695 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
3696   return vqrshlq_u16(a, b);
3697 }
3698 
3699 // CHECK-LABEL: @test_vqrshlq_u32(
3700 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3701 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3702 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3703 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3704 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)3705 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
3706   return vqrshlq_u32(a, b);
3707 }
3708 
3709 // CHECK-LABEL: @test_vqrshlq_u64(
3710 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3711 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3712 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3713 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3714 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)3715 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
3716   return vqrshlq_u64(a, b);
3717 }
3718 
3719 // CHECK-LABEL: @test_vsli_n_p64(
3720 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3721 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3722 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3723 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3724 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
3725 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_p64(poly64x1_t a,poly64x1_t b)3726 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
3727   return vsli_n_p64(a, b, 0);
3728 }
3729 
3730 // CHECK-LABEL: @test_vsliq_n_p64(
3731 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3732 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3733 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3734 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3735 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
3736 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_p64(poly64x2_t a,poly64x2_t b)3737 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
3738   return vsliq_n_p64(a, b, 0);
3739 }
3740 
3741 // CHECK-LABEL: @test_vmax_s8(
3742 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
3743 // CHECK:   ret <8 x i8> [[VMAX_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)3744 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
3745   return vmax_s8(a, b);
3746 }
3747 
3748 // CHECK-LABEL: @test_vmax_s16(
3749 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3750 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3751 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
3752 // CHECK:   ret <4 x i16> [[VMAX2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)3753 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
3754   return vmax_s16(a, b);
3755 }
3756 
3757 // CHECK-LABEL: @test_vmax_s32(
3758 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3759 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3760 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
3761 // CHECK:   ret <2 x i32> [[VMAX2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)3762 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
3763   return vmax_s32(a, b);
3764 }
3765 
3766 // CHECK-LABEL: @test_vmax_u8(
3767 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
3768 // CHECK:   ret <8 x i8> [[VMAX_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)3769 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
3770   return vmax_u8(a, b);
3771 }
3772 
3773 // CHECK-LABEL: @test_vmax_u16(
3774 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3775 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3776 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
3777 // CHECK:   ret <4 x i16> [[VMAX2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)3778 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
3779   return vmax_u16(a, b);
3780 }
3781 
3782 // CHECK-LABEL: @test_vmax_u32(
3783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3784 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3785 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
3786 // CHECK:   ret <2 x i32> [[VMAX2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)3787 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
3788   return vmax_u32(a, b);
3789 }
3790 
3791 // CHECK-LABEL: @test_vmax_f32(
3792 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3793 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3794 // CHECK:   [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
3795 // CHECK:   ret <2 x float> [[VMAX2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)3796 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
3797   return vmax_f32(a, b);
3798 }
3799 
3800 // CHECK-LABEL: @test_vmaxq_s8(
3801 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
3802 // CHECK:   ret <16 x i8> [[VMAX_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)3803 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
3804   return vmaxq_s8(a, b);
3805 }
3806 
3807 // CHECK-LABEL: @test_vmaxq_s16(
3808 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3809 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3810 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
3811 // CHECK:   ret <8 x i16> [[VMAX2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)3812 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
3813   return vmaxq_s16(a, b);
3814 }
3815 
3816 // CHECK-LABEL: @test_vmaxq_s32(
3817 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3818 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3819 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
3820 // CHECK:   ret <4 x i32> [[VMAX2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)3821 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
3822   return vmaxq_s32(a, b);
3823 }
3824 
3825 // CHECK-LABEL: @test_vmaxq_u8(
3826 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
3827 // CHECK:   ret <16 x i8> [[VMAX_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)3828 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
3829   return vmaxq_u8(a, b);
3830 }
3831 
3832 // CHECK-LABEL: @test_vmaxq_u16(
3833 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3834 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3835 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
3836 // CHECK:   ret <8 x i16> [[VMAX2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)3837 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
3838   return vmaxq_u16(a, b);
3839 }
3840 
3841 // CHECK-LABEL: @test_vmaxq_u32(
3842 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3843 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3844 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
3845 // CHECK:   ret <4 x i32> [[VMAX2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)3846 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
3847   return vmaxq_u32(a, b);
3848 }
3849 
3850 // CHECK-LABEL: @test_vmaxq_f32(
3851 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3852 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3853 // CHECK:   [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
3854 // CHECK:   ret <4 x float> [[VMAX2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)3855 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
3856   return vmaxq_f32(a, b);
3857 }
3858 
3859 // CHECK-LABEL: @test_vmaxq_f64(
3860 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3861 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3862 // CHECK:   [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
3863 // CHECK:   ret <2 x double> [[VMAX2_I]]
test_vmaxq_f64(float64x2_t a,float64x2_t b)3864 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
3865   return vmaxq_f64(a, b);
3866 }
3867 
3868 // CHECK-LABEL: @test_vmin_s8(
3869 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
3870 // CHECK:   ret <8 x i8> [[VMIN_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)3871 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
3872   return vmin_s8(a, b);
3873 }
3874 
3875 // CHECK-LABEL: @test_vmin_s16(
3876 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3877 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3878 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
3879 // CHECK:   ret <4 x i16> [[VMIN2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)3880 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
3881   return vmin_s16(a, b);
3882 }
3883 
3884 // CHECK-LABEL: @test_vmin_s32(
3885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3886 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3887 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
3888 // CHECK:   ret <2 x i32> [[VMIN2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)3889 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
3890   return vmin_s32(a, b);
3891 }
3892 
3893 // CHECK-LABEL: @test_vmin_u8(
3894 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
3895 // CHECK:   ret <8 x i8> [[VMIN_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)3896 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
3897   return vmin_u8(a, b);
3898 }
3899 
3900 // CHECK-LABEL: @test_vmin_u16(
3901 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3902 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3903 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
3904 // CHECK:   ret <4 x i16> [[VMIN2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)3905 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
3906   return vmin_u16(a, b);
3907 }
3908 
3909 // CHECK-LABEL: @test_vmin_u32(
3910 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3911 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3912 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
3913 // CHECK:   ret <2 x i32> [[VMIN2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)3914 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
3915   return vmin_u32(a, b);
3916 }
3917 
3918 // CHECK-LABEL: @test_vmin_f32(
3919 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3920 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3921 // CHECK:   [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b)
3922 // CHECK:   ret <2 x float> [[VMIN2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)3923 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
3924   return vmin_f32(a, b);
3925 }
3926 
3927 // CHECK-LABEL: @test_vminq_s8(
3928 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
3929 // CHECK:   ret <16 x i8> [[VMIN_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)3930 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
3931   return vminq_s8(a, b);
3932 }
3933 
3934 // CHECK-LABEL: @test_vminq_s16(
3935 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3936 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3937 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
3938 // CHECK:   ret <8 x i16> [[VMIN2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)3939 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
3940   return vminq_s16(a, b);
3941 }
3942 
3943 // CHECK-LABEL: @test_vminq_s32(
3944 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3945 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3946 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
3947 // CHECK:   ret <4 x i32> [[VMIN2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)3948 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
3949   return vminq_s32(a, b);
3950 }
3951 
3952 // CHECK-LABEL: @test_vminq_u8(
3953 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
3954 // CHECK:   ret <16 x i8> [[VMIN_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)3955 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
3956   return vminq_u8(a, b);
3957 }
3958 
3959 // CHECK-LABEL: @test_vminq_u16(
3960 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3961 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3962 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
3963 // CHECK:   ret <8 x i16> [[VMIN2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)3964 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
3965   return vminq_u16(a, b);
3966 }
3967 
3968 // CHECK-LABEL: @test_vminq_u32(
3969 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3970 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3971 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
3972 // CHECK:   ret <4 x i32> [[VMIN2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)3973 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
3974   return vminq_u32(a, b);
3975 }
3976 
3977 // CHECK-LABEL: @test_vminq_f32(
3978 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3979 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3980 // CHECK:   [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b)
3981 // CHECK:   ret <4 x float> [[VMIN2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)3982 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
3983   return vminq_f32(a, b);
3984 }
3985 
3986 // CHECK-LABEL: @test_vminq_f64(
3987 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3988 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3989 // CHECK:   [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b)
3990 // CHECK:   ret <2 x double> [[VMIN2_I]]
test_vminq_f64(float64x2_t a,float64x2_t b)3991 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
3992   return vminq_f64(a, b);
3993 }
3994 
3995 // CHECK-LABEL: @test_vmaxnm_f32(
3996 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3997 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3998 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
3999 // CHECK:   ret <2 x float> [[VMAXNM2_I]]
test_vmaxnm_f32(float32x2_t a,float32x2_t b)4000 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
4001   return vmaxnm_f32(a, b);
4002 }
4003 
4004 // CHECK-LABEL: @test_vmaxnmq_f32(
4005 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4006 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4007 // CHECK:   [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
4008 // CHECK:   ret <4 x float> [[VMAXNM2_I]]
test_vmaxnmq_f32(float32x4_t a,float32x4_t b)4009 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
4010   return vmaxnmq_f32(a, b);
4011 }
4012 
4013 // CHECK-LABEL: @test_vmaxnmq_f64(
4014 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4015 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4016 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
4017 // CHECK:   ret <2 x double> [[VMAXNM2_I]]
test_vmaxnmq_f64(float64x2_t a,float64x2_t b)4018 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
4019   return vmaxnmq_f64(a, b);
4020 }
4021 
4022 // CHECK-LABEL: @test_vminnm_f32(
4023 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4024 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4025 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
4026 // CHECK:   ret <2 x float> [[VMINNM2_I]]
test_vminnm_f32(float32x2_t a,float32x2_t b)4027 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
4028   return vminnm_f32(a, b);
4029 }
4030 
4031 // CHECK-LABEL: @test_vminnmq_f32(
4032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4033 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4034 // CHECK:   [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
4035 // CHECK:   ret <4 x float> [[VMINNM2_I]]
test_vminnmq_f32(float32x4_t a,float32x4_t b)4036 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
4037   return vminnmq_f32(a, b);
4038 }
4039 
4040 // CHECK-LABEL: @test_vminnmq_f64(
4041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4042 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4043 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
4044 // CHECK:   ret <2 x double> [[VMINNM2_I]]
test_vminnmq_f64(float64x2_t a,float64x2_t b)4045 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
4046   return vminnmq_f64(a, b);
4047 }
4048 
4049 // CHECK-LABEL: @test_vpmax_s8(
4050 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4051 // CHECK:   ret <8 x i8> [[VPMAX_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)4052 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
4053   return vpmax_s8(a, b);
4054 }
4055 
4056 // CHECK-LABEL: @test_vpmax_s16(
4057 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4058 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4059 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4060 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)4061 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
4062   return vpmax_s16(a, b);
4063 }
4064 
4065 // CHECK-LABEL: @test_vpmax_s32(
4066 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4067 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4068 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4069 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)4070 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
4071   return vpmax_s32(a, b);
4072 }
4073 
4074 // CHECK-LABEL: @test_vpmax_u8(
4075 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4076 // CHECK:   ret <8 x i8> [[VPMAX_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)4077 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
4078   return vpmax_u8(a, b);
4079 }
4080 
4081 // CHECK-LABEL: @test_vpmax_u16(
4082 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4083 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4084 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4085 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)4086 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
4087   return vpmax_u16(a, b);
4088 }
4089 
4090 // CHECK-LABEL: @test_vpmax_u32(
4091 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4092 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4093 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4094 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)4095 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
4096   return vpmax_u32(a, b);
4097 }
4098 
4099 // CHECK-LABEL: @test_vpmax_f32(
4100 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4101 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4102 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
4103 // CHECK:   ret <2 x float> [[VPMAX2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)4104 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
4105   return vpmax_f32(a, b);
4106 }
4107 
4108 // CHECK-LABEL: @test_vpmaxq_s8(
4109 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4110 // CHECK:   ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_s8(int8x16_t a,int8x16_t b)4111 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
4112   return vpmaxq_s8(a, b);
4113 }
4114 
4115 // CHECK-LABEL: @test_vpmaxq_s16(
4116 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4117 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4118 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4119 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_s16(int16x8_t a,int16x8_t b)4120 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
4121   return vpmaxq_s16(a, b);
4122 }
4123 
4124 // CHECK-LABEL: @test_vpmaxq_s32(
4125 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4126 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4127 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4128 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_s32(int32x4_t a,int32x4_t b)4129 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
4130   return vpmaxq_s32(a, b);
4131 }
4132 
4133 // CHECK-LABEL: @test_vpmaxq_u8(
4134 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4135 // CHECK:   ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_u8(uint8x16_t a,uint8x16_t b)4136 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
4137   return vpmaxq_u8(a, b);
4138 }
4139 
4140 // CHECK-LABEL: @test_vpmaxq_u16(
4141 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4142 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4143 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4144 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_u16(uint16x8_t a,uint16x8_t b)4145 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
4146   return vpmaxq_u16(a, b);
4147 }
4148 
4149 // CHECK-LABEL: @test_vpmaxq_u32(
4150 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4151 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4152 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4153 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_u32(uint32x4_t a,uint32x4_t b)4154 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
4155   return vpmaxq_u32(a, b);
4156 }
4157 
4158 // CHECK-LABEL: @test_vpmaxq_f32(
4159 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4160 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4161 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
4162 // CHECK:   ret <4 x float> [[VPMAX2_I]]
test_vpmaxq_f32(float32x4_t a,float32x4_t b)4163 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
4164   return vpmaxq_f32(a, b);
4165 }
4166 
4167 // CHECK-LABEL: @test_vpmaxq_f64(
4168 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4169 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4170 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
4171 // CHECK:   ret <2 x double> [[VPMAX2_I]]
test_vpmaxq_f64(float64x2_t a,float64x2_t b)4172 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
4173   return vpmaxq_f64(a, b);
4174 }
4175 
4176 // CHECK-LABEL: @test_vpmin_s8(
4177 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4178 // CHECK:   ret <8 x i8> [[VPMIN_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)4179 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
4180   return vpmin_s8(a, b);
4181 }
4182 
4183 // CHECK-LABEL: @test_vpmin_s16(
4184 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4185 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4186 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4187 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)4188 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
4189   return vpmin_s16(a, b);
4190 }
4191 
4192 // CHECK-LABEL: @test_vpmin_s32(
4193 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4194 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4195 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4196 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)4197 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
4198   return vpmin_s32(a, b);
4199 }
4200 
4201 // CHECK-LABEL: @test_vpmin_u8(
4202 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4203 // CHECK:   ret <8 x i8> [[VPMIN_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)4204 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
4205   return vpmin_u8(a, b);
4206 }
4207 
4208 // CHECK-LABEL: @test_vpmin_u16(
4209 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4210 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4211 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4212 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)4213 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
4214   return vpmin_u16(a, b);
4215 }
4216 
4217 // CHECK-LABEL: @test_vpmin_u32(
4218 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4219 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4220 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4221 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)4222 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
4223   return vpmin_u32(a, b);
4224 }
4225 
4226 // CHECK-LABEL: @test_vpmin_f32(
4227 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4228 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4229 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
4230 // CHECK:   ret <2 x float> [[VPMIN2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)4231 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
4232   return vpmin_f32(a, b);
4233 }
4234 
4235 // CHECK-LABEL: @test_vpminq_s8(
4236 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4237 // CHECK:   ret <16 x i8> [[VPMIN_I]]
test_vpminq_s8(int8x16_t a,int8x16_t b)4238 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
4239   return vpminq_s8(a, b);
4240 }
4241 
4242 // CHECK-LABEL: @test_vpminq_s16(
4243 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4244 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4245 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4246 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
test_vpminq_s16(int16x8_t a,int16x8_t b)4247 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
4248   return vpminq_s16(a, b);
4249 }
4250 
4251 // CHECK-LABEL: @test_vpminq_s32(
4252 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4253 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4254 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4255 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
test_vpminq_s32(int32x4_t a,int32x4_t b)4256 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
4257   return vpminq_s32(a, b);
4258 }
4259 
4260 // CHECK-LABEL: @test_vpminq_u8(
4261 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4262 // CHECK:   ret <16 x i8> [[VPMIN_I]]
test_vpminq_u8(uint8x16_t a,uint8x16_t b)4263 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
4264   return vpminq_u8(a, b);
4265 }
4266 
4267 // CHECK-LABEL: @test_vpminq_u16(
4268 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4269 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4270 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4271 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
test_vpminq_u16(uint16x8_t a,uint16x8_t b)4272 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
4273   return vpminq_u16(a, b);
4274 }
4275 
4276 // CHECK-LABEL: @test_vpminq_u32(
4277 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4278 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4279 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4280 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
test_vpminq_u32(uint32x4_t a,uint32x4_t b)4281 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
4282   return vpminq_u32(a, b);
4283 }
4284 
4285 // CHECK-LABEL: @test_vpminq_f32(
4286 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4287 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4288 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
4289 // CHECK:   ret <4 x float> [[VPMIN2_I]]
test_vpminq_f32(float32x4_t a,float32x4_t b)4290 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
4291   return vpminq_f32(a, b);
4292 }
4293 
4294 // CHECK-LABEL: @test_vpminq_f64(
4295 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4296 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4297 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
4298 // CHECK:   ret <2 x double> [[VPMIN2_I]]
test_vpminq_f64(float64x2_t a,float64x2_t b)4299 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
4300   return vpminq_f64(a, b);
4301 }
4302 
4303 // CHECK-LABEL: @test_vpmaxnm_f32(
4304 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4305 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4306 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
4307 // CHECK:   ret <2 x float> [[VPMAXNM2_I]]
test_vpmaxnm_f32(float32x2_t a,float32x2_t b)4308 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
4309   return vpmaxnm_f32(a, b);
4310 }
4311 
4312 // CHECK-LABEL: @test_vpmaxnmq_f32(
4313 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4314 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4315 // CHECK:   [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
4316 // CHECK:   ret <4 x float> [[VPMAXNM2_I]]
test_vpmaxnmq_f32(float32x4_t a,float32x4_t b)4317 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
4318   return vpmaxnmq_f32(a, b);
4319 }
4320 
4321 // CHECK-LABEL: @test_vpmaxnmq_f64(
4322 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4323 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4324 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
4325 // CHECK:   ret <2 x double> [[VPMAXNM2_I]]
test_vpmaxnmq_f64(float64x2_t a,float64x2_t b)4326 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
4327   return vpmaxnmq_f64(a, b);
4328 }
4329 
4330 // CHECK-LABEL: @test_vpminnm_f32(
4331 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4332 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4333 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
4334 // CHECK:   ret <2 x float> [[VPMINNM2_I]]
test_vpminnm_f32(float32x2_t a,float32x2_t b)4335 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
4336   return vpminnm_f32(a, b);
4337 }
4338 
4339 // CHECK-LABEL: @test_vpminnmq_f32(
4340 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4341 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4342 // CHECK:   [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
4343 // CHECK:   ret <4 x float> [[VPMINNM2_I]]
test_vpminnmq_f32(float32x4_t a,float32x4_t b)4344 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
4345   return vpminnmq_f32(a, b);
4346 }
4347 
4348 // CHECK-LABEL: @test_vpminnmq_f64(
4349 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4350 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4351 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
4352 // CHECK:   ret <2 x double> [[VPMINNM2_I]]
test_vpminnmq_f64(float64x2_t a,float64x2_t b)4353 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
4354   return vpminnmq_f64(a, b);
4355 }
4356 
4357 // CHECK-LABEL: @test_vpadd_s8(
4358 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4359 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)4360 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
4361   return vpadd_s8(a, b);
4362 }
4363 
4364 // CHECK-LABEL: @test_vpadd_s16(
4365 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4366 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4367 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4368 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4369 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_s16(int16x4_t a,int16x4_t b)4370 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
4371   return vpadd_s16(a, b);
4372 }
4373 
4374 // CHECK-LABEL: @test_vpadd_s32(
4375 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4376 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4377 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4378 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4379 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_s32(int32x2_t a,int32x2_t b)4380 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
4381   return vpadd_s32(a, b);
4382 }
4383 
4384 // CHECK-LABEL: @test_vpadd_u8(
4385 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4386 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)4387 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
4388   return vpadd_u8(a, b);
4389 }
4390 
4391 // CHECK-LABEL: @test_vpadd_u16(
4392 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4393 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4394 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4395 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4396 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)4397 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
4398   return vpadd_u16(a, b);
4399 }
4400 
4401 // CHECK-LABEL: @test_vpadd_u32(
4402 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4403 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4404 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4405 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4406 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)4407 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
4408   return vpadd_u32(a, b);
4409 }
4410 
4411 // CHECK-LABEL: @test_vpadd_f32(
4412 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4413 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4414 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
4415 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
4416 // CHECK:   ret <2 x float> [[VPADD_V2_I]]
test_vpadd_f32(float32x2_t a,float32x2_t b)4417 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
4418   return vpadd_f32(a, b);
4419 }
4420 
4421 // CHECK-LABEL: @test_vpaddq_s8(
4422 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4423 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_s8(int8x16_t a,int8x16_t b)4424 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
4425   return vpaddq_s8(a, b);
4426 }
4427 
4428 // CHECK-LABEL: @test_vpaddq_s16(
4429 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4430 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4431 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4432 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4433 // CHECK:   ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_s16(int16x8_t a,int16x8_t b)4434 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
4435   return vpaddq_s16(a, b);
4436 }
4437 
4438 // CHECK-LABEL: @test_vpaddq_s32(
4439 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4440 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4441 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4442 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4443 // CHECK:   ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_s32(int32x4_t a,int32x4_t b)4444 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
4445   return vpaddq_s32(a, b);
4446 }
4447 
4448 // CHECK-LABEL: @test_vpaddq_u8(
4449 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4450 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_u8(uint8x16_t a,uint8x16_t b)4451 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
4452   return vpaddq_u8(a, b);
4453 }
4454 
4455 // CHECK-LABEL: @test_vpaddq_u16(
4456 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4457 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4458 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4459 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4460 // CHECK:   ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_u16(uint16x8_t a,uint16x8_t b)4461 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
4462   return vpaddq_u16(a, b);
4463 }
4464 
4465 // CHECK-LABEL: @test_vpaddq_u32(
4466 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4467 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4468 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4469 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4470 // CHECK:   ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_u32(uint32x4_t a,uint32x4_t b)4471 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
4472   return vpaddq_u32(a, b);
4473 }
4474 
4475 // CHECK-LABEL: @test_vpaddq_f32(
4476 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4477 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4478 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
4479 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
4480 // CHECK:   ret <4 x float> [[VPADDQ_V2_I]]
test_vpaddq_f32(float32x4_t a,float32x4_t b)4481 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
4482   return vpaddq_f32(a, b);
4483 }
4484 
4485 // CHECK-LABEL: @test_vpaddq_f64(
4486 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4487 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4488 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
4489 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
4490 // CHECK:   ret <2 x double> [[VPADDQ_V2_I]]
test_vpaddq_f64(float64x2_t a,float64x2_t b)4491 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
4492   return vpaddq_f64(a, b);
4493 }
4494 
4495 // CHECK-LABEL: @test_vqdmulh_s16(
4496 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4497 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4498 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4499 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
4500 // CHECK:   ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)4501 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
4502   return vqdmulh_s16(a, b);
4503 }
4504 
4505 // CHECK-LABEL: @test_vqdmulh_s32(
4506 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4507 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4508 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4509 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
4510 // CHECK:   ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)4511 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
4512   return vqdmulh_s32(a, b);
4513 }
4514 
4515 // CHECK-LABEL: @test_vqdmulhq_s16(
4516 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4517 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4518 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4519 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
4520 // CHECK:   ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)4521 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
4522   return vqdmulhq_s16(a, b);
4523 }
4524 
4525 // CHECK-LABEL: @test_vqdmulhq_s32(
4526 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4527 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4528 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4529 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
4530 // CHECK:   ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)4531 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
4532   return vqdmulhq_s32(a, b);
4533 }
4534 
4535 // CHECK-LABEL: @test_vqrdmulh_s16(
4536 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4537 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4538 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4539 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
4540 // CHECK:   ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)4541 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
4542   return vqrdmulh_s16(a, b);
4543 }
4544 
4545 // CHECK-LABEL: @test_vqrdmulh_s32(
4546 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4547 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4548 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4549 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
4550 // CHECK:   ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)4551 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
4552   return vqrdmulh_s32(a, b);
4553 }
4554 
4555 // CHECK-LABEL: @test_vqrdmulhq_s16(
4556 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4557 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4558 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4559 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
4560 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)4561 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
4562   return vqrdmulhq_s16(a, b);
4563 }
4564 
4565 // CHECK-LABEL: @test_vqrdmulhq_s32(
4566 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4567 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4568 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4569 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
4570 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)4571 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
4572   return vqrdmulhq_s32(a, b);
4573 }
4574 
4575 // CHECK-LABEL: @test_vmulx_f32(
4576 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4577 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4578 // CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
4579 // CHECK:   ret <2 x float> [[VMULX2_I]]
test_vmulx_f32(float32x2_t a,float32x2_t b)4580 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
4581   return vmulx_f32(a, b);
4582 }
4583 
4584 // CHECK-LABEL: @test_vmulxq_f32(
4585 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4586 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4587 // CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
4588 // CHECK:   ret <4 x float> [[VMULX2_I]]
test_vmulxq_f32(float32x4_t a,float32x4_t b)4589 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
4590   return vmulxq_f32(a, b);
4591 }
4592 
4593 // CHECK-LABEL: @test_vmulxq_f64(
4594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4595 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4596 // CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
4597 // CHECK:   ret <2 x double> [[VMULX2_I]]
test_vmulxq_f64(float64x2_t a,float64x2_t b)4598 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
4599   return vmulxq_f64(a, b);
4600 }
4601 
4602 // CHECK-LABEL: @test_vshl_n_s8(
4603 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4604 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)4605 int8x8_t test_vshl_n_s8(int8x8_t a) {
4606   return vshl_n_s8(a, 3);
4607 }
4608 
4609 // CHECK-LABEL: @test_vshl_n_s16(
4610 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4611 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4612 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4613 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)4614 int16x4_t test_vshl_n_s16(int16x4_t a) {
4615   return vshl_n_s16(a, 3);
4616 }
4617 
4618 // CHECK-LABEL: @test_vshl_n_s32(
4619 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4620 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4621 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4622 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)4623 int32x2_t test_vshl_n_s32(int32x2_t a) {
4624   return vshl_n_s32(a, 3);
4625 }
4626 
4627 // CHECK-LABEL: @test_vshlq_n_s8(
4628 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4629 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)4630 int8x16_t test_vshlq_n_s8(int8x16_t a) {
4631   return vshlq_n_s8(a, 3);
4632 }
4633 
4634 // CHECK-LABEL: @test_vshlq_n_s16(
4635 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4636 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4637 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4638 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)4639 int16x8_t test_vshlq_n_s16(int16x8_t a) {
4640   return vshlq_n_s16(a, 3);
4641 }
4642 
4643 // CHECK-LABEL: @test_vshlq_n_s32(
4644 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4645 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4646 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4647 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)4648 int32x4_t test_vshlq_n_s32(int32x4_t a) {
4649   return vshlq_n_s32(a, 3);
4650 }
4651 
4652 // CHECK-LABEL: @test_vshlq_n_s64(
4653 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4654 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4655 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4656 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)4657 int64x2_t test_vshlq_n_s64(int64x2_t a) {
4658   return vshlq_n_s64(a, 3);
4659 }
4660 
4661 // CHECK-LABEL: @test_vshl_n_u8(
4662 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4663 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(uint8x8_t a)4664 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
4665   return vshl_n_u8(a, 3);
4666 }
4667 
4668 // CHECK-LABEL: @test_vshl_n_u16(
4669 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4670 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4671 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4672 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(uint16x4_t a)4673 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
4674   return vshl_n_u16(a, 3);
4675 }
4676 
4677 // CHECK-LABEL: @test_vshl_n_u32(
4678 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4679 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4680 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4681 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(uint32x2_t a)4682 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
4683   return vshl_n_u32(a, 3);
4684 }
4685 
4686 // CHECK-LABEL: @test_vshlq_n_u8(
4687 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4688 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(uint8x16_t a)4689 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
4690   return vshlq_n_u8(a, 3);
4691 }
4692 
4693 // CHECK-LABEL: @test_vshlq_n_u16(
4694 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4695 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4696 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4697 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(uint16x8_t a)4698 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
4699   return vshlq_n_u16(a, 3);
4700 }
4701 
4702 // CHECK-LABEL: @test_vshlq_n_u32(
4703 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4704 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4706 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(uint32x4_t a)4707 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
4708   return vshlq_n_u32(a, 3);
4709 }
4710 
4711 // CHECK-LABEL: @test_vshlq_n_u64(
4712 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4713 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4714 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4715 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(uint64x2_t a)4716 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
4717   return vshlq_n_u64(a, 3);
4718 }
4719 
4720 // CHECK-LABEL: @test_vshr_n_s8(
4721 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4722 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)4723 int8x8_t test_vshr_n_s8(int8x8_t a) {
4724   return vshr_n_s8(a, 3);
4725 }
4726 
4727 // CHECK-LABEL: @test_vshr_n_s16(
4728 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4729 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4730 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4731 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)4732 int16x4_t test_vshr_n_s16(int16x4_t a) {
4733   return vshr_n_s16(a, 3);
4734 }
4735 
4736 // CHECK-LABEL: @test_vshr_n_s32(
4737 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4738 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4739 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
4740 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)4741 int32x2_t test_vshr_n_s32(int32x2_t a) {
4742   return vshr_n_s32(a, 3);
4743 }
4744 
4745 // CHECK-LABEL: @test_vshrq_n_s8(
4746 // CHECK:   [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4747 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)4748 int8x16_t test_vshrq_n_s8(int8x16_t a) {
4749   return vshrq_n_s8(a, 3);
4750 }
4751 
4752 // CHECK-LABEL: @test_vshrq_n_s16(
4753 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4754 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4755 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4756 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)4757 int16x8_t test_vshrq_n_s16(int16x8_t a) {
4758   return vshrq_n_s16(a, 3);
4759 }
4760 
4761 // CHECK-LABEL: @test_vshrq_n_s32(
4762 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4763 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4764 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4765 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)4766 int32x4_t test_vshrq_n_s32(int32x4_t a) {
4767   return vshrq_n_s32(a, 3);
4768 }
4769 
4770 // CHECK-LABEL: @test_vshrq_n_s64(
4771 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4772 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4773 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
4774 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)4775 int64x2_t test_vshrq_n_s64(int64x2_t a) {
4776   return vshrq_n_s64(a, 3);
4777 }
4778 
4779 // CHECK-LABEL: @test_vshr_n_u8(
4780 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4781 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(uint8x8_t a)4782 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
4783   return vshr_n_u8(a, 3);
4784 }
4785 
4786 // CHECK-LABEL: @test_vshr_n_u16(
4787 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4788 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4789 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4790 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(uint16x4_t a)4791 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
4792   return vshr_n_u16(a, 3);
4793 }
4794 
4795 // CHECK-LABEL: @test_vshr_n_u32(
4796 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4797 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4798 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
4799 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(uint32x2_t a)4800 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
4801   return vshr_n_u32(a, 3);
4802 }
4803 
4804 // CHECK-LABEL: @test_vshrq_n_u8(
4805 // CHECK:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4806 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(uint8x16_t a)4807 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
4808   return vshrq_n_u8(a, 3);
4809 }
4810 
4811 // CHECK-LABEL: @test_vshrq_n_u16(
4812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4813 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4815 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(uint16x8_t a)4816 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
4817   return vshrq_n_u16(a, 3);
4818 }
4819 
4820 // CHECK-LABEL: @test_vshrq_n_u32(
4821 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4822 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4823 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4824 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(uint32x4_t a)4825 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
4826   return vshrq_n_u32(a, 3);
4827 }
4828 
4829 // CHECK-LABEL: @test_vshrq_n_u64(
4830 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4831 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4832 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
4833 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(uint64x2_t a)4834 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
4835   return vshrq_n_u64(a, 3);
4836 }
4837 
4838 // CHECK-LABEL: @test_vsra_n_s8(
4839 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4840 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4841 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)4842 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
4843   return vsra_n_s8(a, b, 3);
4844 }
4845 
4846 // CHECK-LABEL: @test_vsra_n_s16(
4847 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4848 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4849 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4850 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4851 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4852 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4853 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)4854 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
4855   return vsra_n_s16(a, b, 3);
4856 }
4857 
4858 // CHECK-LABEL: @test_vsra_n_s32(
4859 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4860 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4861 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4862 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4863 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
4864 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4865 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)4866 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
4867   return vsra_n_s32(a, b, 3);
4868 }
4869 
4870 // CHECK-LABEL: @test_vsraq_n_s8(
4871 // CHECK:   [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4872 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4873 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)4874 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
4875   return vsraq_n_s8(a, b, 3);
4876 }
4877 
4878 // CHECK-LABEL: @test_vsraq_n_s16(
4879 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4880 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4881 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4882 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4883 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4884 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4885 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)4886 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
4887   return vsraq_n_s16(a, b, 3);
4888 }
4889 
4890 // CHECK-LABEL: @test_vsraq_n_s32(
4891 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4892 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4893 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4894 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4895 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4896 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4897 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)4898 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
4899   return vsraq_n_s32(a, b, 3);
4900 }
4901 
4902 // CHECK-LABEL: @test_vsraq_n_s64(
4903 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4904 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4905 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4906 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4907 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
4908 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4909 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)4910 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
4911   return vsraq_n_s64(a, b, 3);
4912 }
4913 
4914 // CHECK-LABEL: @test_vsra_n_u8(
4915 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4916 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4917 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_u8(uint8x8_t a,uint8x8_t b)4918 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
4919   return vsra_n_u8(a, b, 3);
4920 }
4921 
4922 // CHECK-LABEL: @test_vsra_n_u16(
4923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4925 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4926 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4927 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4928 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4929 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_u16(uint16x4_t a,uint16x4_t b)4930 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
4931   return vsra_n_u16(a, b, 3);
4932 }
4933 
4934 // CHECK-LABEL: @test_vsra_n_u32(
4935 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4936 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4937 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4938 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4939 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
4940 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4941 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_u32(uint32x2_t a,uint32x2_t b)4942 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
4943   return vsra_n_u32(a, b, 3);
4944 }
4945 
4946 // CHECK-LABEL: @test_vsraq_n_u8(
4947 // CHECK:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4948 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4949 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(uint8x16_t a,uint8x16_t b)4950 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
4951   return vsraq_n_u8(a, b, 3);
4952 }
4953 
4954 // CHECK-LABEL: @test_vsraq_n_u16(
4955 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4956 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4957 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4958 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4959 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4960 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4961 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(uint16x8_t a,uint16x8_t b)4962 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
4963   return vsraq_n_u16(a, b, 3);
4964 }
4965 
4966 // CHECK-LABEL: @test_vsraq_n_u32(
4967 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4968 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4969 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4970 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4971 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4972 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4973 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(uint32x4_t a,uint32x4_t b)4974 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
4975   return vsraq_n_u32(a, b, 3);
4976 }
4977 
4978 // CHECK-LABEL: @test_vsraq_n_u64(
4979 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4980 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4981 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4982 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4983 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
4984 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4985 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(uint64x2_t a,uint64x2_t b)4986 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
4987   return vsraq_n_u64(a, b, 3);
4988 }
4989 
4990 // CHECK-LABEL: @test_vrshr_n_s8(
4991 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
4992 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)4993 int8x8_t test_vrshr_n_s8(int8x8_t a) {
4994   return vrshr_n_s8(a, 3);
4995 }
4996 
4997 // CHECK-LABEL: @test_vrshr_n_s16(
4998 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4999 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5000 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5001 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)5002 int16x4_t test_vrshr_n_s16(int16x4_t a) {
5003   return vrshr_n_s16(a, 3);
5004 }
5005 
5006 // CHECK-LABEL: @test_vrshr_n_s32(
5007 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5008 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5009 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5010 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)5011 int32x2_t test_vrshr_n_s32(int32x2_t a) {
5012   return vrshr_n_s32(a, 3);
5013 }
5014 
5015 // CHECK-LABEL: @test_vrshrq_n_s8(
5016 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5017 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)5018 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
5019   return vrshrq_n_s8(a, 3);
5020 }
5021 
5022 // CHECK-LABEL: @test_vrshrq_n_s16(
5023 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5024 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5025 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5026 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)5027 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
5028   return vrshrq_n_s16(a, 3);
5029 }
5030 
5031 // CHECK-LABEL: @test_vrshrq_n_s32(
5032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5033 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5034 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5035 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)5036 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
5037   return vrshrq_n_s32(a, 3);
5038 }
5039 
5040 // CHECK-LABEL: @test_vrshrq_n_s64(
5041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5042 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5043 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5044 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)5045 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
5046   return vrshrq_n_s64(a, 3);
5047 }
5048 
5049 // CHECK-LABEL: @test_vrshr_n_u8(
5050 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5051 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(uint8x8_t a)5052 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
5053   return vrshr_n_u8(a, 3);
5054 }
5055 
5056 // CHECK-LABEL: @test_vrshr_n_u16(
5057 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5058 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5059 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5060 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(uint16x4_t a)5061 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
5062   return vrshr_n_u16(a, 3);
5063 }
5064 
5065 // CHECK-LABEL: @test_vrshr_n_u32(
5066 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5067 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5068 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5069 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(uint32x2_t a)5070 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
5071   return vrshr_n_u32(a, 3);
5072 }
5073 
5074 // CHECK-LABEL: @test_vrshrq_n_u8(
5075 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5076 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(uint8x16_t a)5077 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
5078   return vrshrq_n_u8(a, 3);
5079 }
5080 
5081 // CHECK-LABEL: @test_vrshrq_n_u16(
5082 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5083 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5084 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5085 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(uint16x8_t a)5086 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
5087   return vrshrq_n_u16(a, 3);
5088 }
5089 
5090 // CHECK-LABEL: @test_vrshrq_n_u32(
5091 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5092 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5093 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5094 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(uint32x4_t a)5095 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
5096   return vrshrq_n_u32(a, 3);
5097 }
5098 
5099 // CHECK-LABEL: @test_vrshrq_n_u64(
5100 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5101 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5102 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5103 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(uint64x2_t a)5104 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
5105   return vrshrq_n_u64(a, 3);
5106 }
5107 
5108 // CHECK-LABEL: @test_vrsra_n_s8(
5109 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5110 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5111 // CHECK:   ret <8 x i8> [[TMP0]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)5112 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
5113   return vrsra_n_s8(a, b, 3);
5114 }
5115 
5116 // CHECK-LABEL: @test_vrsra_n_s16(
5117 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5118 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5119 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5120 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5121 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5122 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5123 // CHECK:   ret <4 x i16> [[TMP3]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)5124 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
5125   return vrsra_n_s16(a, b, 3);
5126 }
5127 
5128 // CHECK-LABEL: @test_vrsra_n_s32(
5129 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5130 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5131 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5132 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5133 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5134 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5135 // CHECK:   ret <2 x i32> [[TMP3]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)5136 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
5137   return vrsra_n_s32(a, b, 3);
5138 }
5139 
5140 // CHECK-LABEL: @test_vrsraq_n_s8(
5141 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5142 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5143 // CHECK:   ret <16 x i8> [[TMP0]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)5144 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
5145   return vrsraq_n_s8(a, b, 3);
5146 }
5147 
5148 // CHECK-LABEL: @test_vrsraq_n_s16(
5149 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5150 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5151 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5152 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5153 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5154 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5155 // CHECK:   ret <8 x i16> [[TMP3]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)5156 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
5157   return vrsraq_n_s16(a, b, 3);
5158 }
5159 
5160 // CHECK-LABEL: @test_vrsraq_n_s32(
5161 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5162 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5163 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5165 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5166 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5167 // CHECK:   ret <4 x i32> [[TMP3]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)5168 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
5169   return vrsraq_n_s32(a, b, 3);
5170 }
5171 
5172 // CHECK-LABEL: @test_vrsraq_n_s64(
5173 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5174 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5175 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5176 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5177 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5178 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5179 // CHECK:   ret <2 x i64> [[TMP3]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)5180 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
5181   return vrsraq_n_s64(a, b, 3);
5182 }
5183 
5184 // CHECK-LABEL: @test_vrsra_n_u8(
5185 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5186 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5187 // CHECK:   ret <8 x i8> [[TMP0]]
test_vrsra_n_u8(uint8x8_t a,uint8x8_t b)5188 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
5189   return vrsra_n_u8(a, b, 3);
5190 }
5191 
5192 // CHECK-LABEL: @test_vrsra_n_u16(
5193 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5194 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5195 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5196 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5197 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5198 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5199 // CHECK:   ret <4 x i16> [[TMP3]]
test_vrsra_n_u16(uint16x4_t a,uint16x4_t b)5200 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
5201   return vrsra_n_u16(a, b, 3);
5202 }
5203 
5204 // CHECK-LABEL: @test_vrsra_n_u32(
5205 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5206 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5207 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5208 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5209 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5210 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5211 // CHECK:   ret <2 x i32> [[TMP3]]
test_vrsra_n_u32(uint32x2_t a,uint32x2_t b)5212 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
5213   return vrsra_n_u32(a, b, 3);
5214 }
5215 
5216 // CHECK-LABEL: @test_vrsraq_n_u8(
5217 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5218 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5219 // CHECK:   ret <16 x i8> [[TMP0]]
test_vrsraq_n_u8(uint8x16_t a,uint8x16_t b)5220 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
5221   return vrsraq_n_u8(a, b, 3);
5222 }
5223 
5224 // CHECK-LABEL: @test_vrsraq_n_u16(
5225 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5226 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5227 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5228 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5229 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5230 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5231 // CHECK:   ret <8 x i16> [[TMP3]]
test_vrsraq_n_u16(uint16x8_t a,uint16x8_t b)5232 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
5233   return vrsraq_n_u16(a, b, 3);
5234 }
5235 
5236 // CHECK-LABEL: @test_vrsraq_n_u32(
5237 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5238 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5239 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5240 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5241 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5242 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5243 // CHECK:   ret <4 x i32> [[TMP3]]
test_vrsraq_n_u32(uint32x4_t a,uint32x4_t b)5244 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
5245   return vrsraq_n_u32(a, b, 3);
5246 }
5247 
5248 // CHECK-LABEL: @test_vrsraq_n_u64(
5249 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5250 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5251 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5252 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5253 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5254 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5255 // CHECK:   ret <2 x i64> [[TMP3]]
test_vrsraq_n_u64(uint64x2_t a,uint64x2_t b)5256 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
5257   return vrsraq_n_u64(a, b, 3);
5258 }
5259 
5260 // CHECK-LABEL: @test_vsri_n_s8(
5261 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5262 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)5263 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
5264   return vsri_n_s8(a, b, 3);
5265 }
5266 
5267 // CHECK-LABEL: @test_vsri_n_s16(
5268 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5269 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5270 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5271 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5272 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5273 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)5274 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
5275   return vsri_n_s16(a, b, 3);
5276 }
5277 
5278 // CHECK-LABEL: @test_vsri_n_s32(
5279 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5280 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5281 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5282 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5283 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5284 // CHECK:   ret <2 x i32> [[VSRI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)5285 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
5286   return vsri_n_s32(a, b, 3);
5287 }
5288 
5289 // CHECK-LABEL: @test_vsriq_n_s8(
5290 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5291 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)5292 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
5293   return vsriq_n_s8(a, b, 3);
5294 }
5295 
5296 // CHECK-LABEL: @test_vsriq_n_s16(
5297 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5298 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5299 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5300 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5301 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5302 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)5303 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
5304   return vsriq_n_s16(a, b, 3);
5305 }
5306 
5307 // CHECK-LABEL: @test_vsriq_n_s32(
5308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5309 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5310 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5311 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5312 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5313 // CHECK:   ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)5314 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
5315   return vsriq_n_s32(a, b, 3);
5316 }
5317 
5318 // CHECK-LABEL: @test_vsriq_n_s64(
5319 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5320 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5321 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5322 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5323 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5324 // CHECK:   ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)5325 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
5326   return vsriq_n_s64(a, b, 3);
5327 }
5328 
5329 // CHECK-LABEL: @test_vsri_n_u8(
5330 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5331 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_u8(uint8x8_t a,uint8x8_t b)5332 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
5333   return vsri_n_u8(a, b, 3);
5334 }
5335 
5336 // CHECK-LABEL: @test_vsri_n_u16(
5337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5339 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5340 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5341 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5342 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_u16(uint16x4_t a,uint16x4_t b)5343 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
5344   return vsri_n_u16(a, b, 3);
5345 }
5346 
5347 // CHECK-LABEL: @test_vsri_n_u32(
5348 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5349 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5350 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5351 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5352 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5353 // CHECK:   ret <2 x i32> [[VSRI_N2]]
test_vsri_n_u32(uint32x2_t a,uint32x2_t b)5354 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
5355   return vsri_n_u32(a, b, 3);
5356 }
5357 
5358 // CHECK-LABEL: @test_vsriq_n_u8(
5359 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5360 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_u8(uint8x16_t a,uint8x16_t b)5361 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
5362   return vsriq_n_u8(a, b, 3);
5363 }
5364 
5365 // CHECK-LABEL: @test_vsriq_n_u16(
5366 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5367 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5368 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5369 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5370 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5371 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_u16(uint16x8_t a,uint16x8_t b)5372 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
5373   return vsriq_n_u16(a, b, 3);
5374 }
5375 
5376 // CHECK-LABEL: @test_vsriq_n_u32(
5377 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5378 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5379 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5380 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5381 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5382 // CHECK:   ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_u32(uint32x4_t a,uint32x4_t b)5383 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
5384   return vsriq_n_u32(a, b, 3);
5385 }
5386 
5387 // CHECK-LABEL: @test_vsriq_n_u64(
5388 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5389 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5390 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5391 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5392 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5393 // CHECK:   ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_u64(uint64x2_t a,uint64x2_t b)5394 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
5395   return vsriq_n_u64(a, b, 3);
5396 }
5397 
5398 // CHECK-LABEL: @test_vsri_n_p8(
5399 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5400 // CHECK:   ret <8 x i8> [[VSRI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)5401 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
5402   return vsri_n_p8(a, b, 3);
5403 }
5404 
5405 // CHECK-LABEL: @test_vsri_n_p16(
5406 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5407 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5408 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5409 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5410 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
5411 // CHECK:   ret <4 x i16> [[VSRI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)5412 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
5413   return vsri_n_p16(a, b, 15);
5414 }
5415 
5416 // CHECK-LABEL: @test_vsriq_n_p8(
5417 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5418 // CHECK:   ret <16 x i8> [[VSRI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)5419 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
5420   return vsriq_n_p8(a, b, 3);
5421 }
5422 
5423 // CHECK-LABEL: @test_vsriq_n_p16(
5424 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5425 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5426 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5427 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5428 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
5429 // CHECK:   ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)5430 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
5431   return vsriq_n_p16(a, b, 15);
5432 }
5433 
5434 // CHECK-LABEL: @test_vsli_n_s8(
5435 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5436 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)5437 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
5438   return vsli_n_s8(a, b, 3);
5439 }
5440 
5441 // CHECK-LABEL: @test_vsli_n_s16(
5442 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5444 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5445 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5446 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5447 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)5448 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
5449   return vsli_n_s16(a, b, 3);
5450 }
5451 
5452 // CHECK-LABEL: @test_vsli_n_s32(
5453 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5454 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5455 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5456 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5457 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5458 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)5459 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
5460   return vsli_n_s32(a, b, 3);
5461 }
5462 
5463 // CHECK-LABEL: @test_vsliq_n_s8(
5464 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5465 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)5466 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
5467   return vsliq_n_s8(a, b, 3);
5468 }
5469 
5470 // CHECK-LABEL: @test_vsliq_n_s16(
5471 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5472 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5473 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5474 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5475 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5476 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)5477 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
5478   return vsliq_n_s16(a, b, 3);
5479 }
5480 
5481 // CHECK-LABEL: @test_vsliq_n_s32(
5482 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5483 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5484 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5485 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5486 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5487 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)5488 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
5489   return vsliq_n_s32(a, b, 3);
5490 }
5491 
5492 // CHECK-LABEL: @test_vsliq_n_s64(
5493 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5494 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5495 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5496 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5497 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5498 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)5499 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
5500   return vsliq_n_s64(a, b, 3);
5501 }
5502 
5503 // CHECK-LABEL: @test_vsli_n_u8(
5504 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5505 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)5506 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
5507   return vsli_n_u8(a, b, 3);
5508 }
5509 
5510 // CHECK-LABEL: @test_vsli_n_u16(
5511 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5512 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5513 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5514 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5515 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5516 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)5517 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
5518   return vsli_n_u16(a, b, 3);
5519 }
5520 
5521 // CHECK-LABEL: @test_vsli_n_u32(
5522 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5523 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5524 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5525 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5526 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5527 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)5528 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
5529   return vsli_n_u32(a, b, 3);
5530 }
5531 
5532 // CHECK-LABEL: @test_vsliq_n_u8(
5533 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5534 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)5535 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
5536   return vsliq_n_u8(a, b, 3);
5537 }
5538 
5539 // CHECK-LABEL: @test_vsliq_n_u16(
5540 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5541 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5542 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5543 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5544 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5545 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)5546 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
5547   return vsliq_n_u16(a, b, 3);
5548 }
5549 
5550 // CHECK-LABEL: @test_vsliq_n_u32(
5551 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5552 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5553 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5554 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5555 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5556 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)5557 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
5558   return vsliq_n_u32(a, b, 3);
5559 }
5560 
5561 // CHECK-LABEL: @test_vsliq_n_u64(
5562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5563 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5564 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5565 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5566 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5567 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)5568 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
5569   return vsliq_n_u64(a, b, 3);
5570 }
5571 
5572 // CHECK-LABEL: @test_vsli_n_p8(
5573 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5574 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)5575 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
5576   return vsli_n_p8(a, b, 3);
5577 }
5578 
5579 // CHECK-LABEL: @test_vsli_n_p16(
5580 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5581 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5582 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5583 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5584 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
5585 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)5586 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
5587   return vsli_n_p16(a, b, 15);
5588 }
5589 
5590 // CHECK-LABEL: @test_vsliq_n_p8(
5591 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5592 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)5593 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
5594   return vsliq_n_p8(a, b, 3);
5595 }
5596 
5597 // CHECK-LABEL: @test_vsliq_n_p16(
5598 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5599 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5600 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5601 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5602 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
5603 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)5604 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
5605   return vsliq_n_p16(a, b, 15);
5606 }
5607 
5608 // CHECK-LABEL: @test_vqshlu_n_s8(
5609 // CHECK:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5610 // CHECK:   ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)5611 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
5612   return vqshlu_n_s8(a, 3);
5613 }
5614 
5615 // CHECK-LABEL: @test_vqshlu_n_s16(
5616 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5617 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5618 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
5619 // CHECK:   ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)5620 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
5621   return vqshlu_n_s16(a, 3);
5622 }
5623 
5624 // CHECK-LABEL: @test_vqshlu_n_s32(
5625 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5626 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5627 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
5628 // CHECK:   ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)5629 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
5630   return vqshlu_n_s32(a, 3);
5631 }
5632 
5633 // CHECK-LABEL: @test_vqshluq_n_s8(
5634 // CHECK:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5635 // CHECK:   ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)5636 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
5637   return vqshluq_n_s8(a, 3);
5638 }
5639 
5640 // CHECK-LABEL: @test_vqshluq_n_s16(
5641 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5642 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5643 // CHECK:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
5644 // CHECK:   ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)5645 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
5646   return vqshluq_n_s16(a, 3);
5647 }
5648 
5649 // CHECK-LABEL: @test_vqshluq_n_s32(
5650 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5651 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5652 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
5653 // CHECK:   ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)5654 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
5655   return vqshluq_n_s32(a, 3);
5656 }
5657 
5658 // CHECK-LABEL: @test_vqshluq_n_s64(
5659 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5660 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5661 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
5662 // CHECK:   ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)5663 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
5664   return vqshluq_n_s64(a, 3);
5665 }
5666 
5667 // CHECK-LABEL: @test_vshrn_n_s16(
5668 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5669 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5670 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5671 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5672 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)5673 int8x8_t test_vshrn_n_s16(int16x8_t a) {
5674   return vshrn_n_s16(a, 3);
5675 }
5676 
5677 // CHECK-LABEL: @test_vshrn_n_s32(
5678 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5679 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5680 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5681 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5682 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)5683 int16x4_t test_vshrn_n_s32(int32x4_t a) {
5684   return vshrn_n_s32(a, 9);
5685 }
5686 
5687 // CHECK-LABEL: @test_vshrn_n_s64(
5688 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5689 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5690 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5691 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5692 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)5693 int32x2_t test_vshrn_n_s64(int64x2_t a) {
5694   return vshrn_n_s64(a, 19);
5695 }
5696 
5697 // CHECK-LABEL: @test_vshrn_n_u16(
5698 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5699 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5700 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5701 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5702 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)5703 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
5704   return vshrn_n_u16(a, 3);
5705 }
5706 
5707 // CHECK-LABEL: @test_vshrn_n_u32(
5708 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5709 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5710 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5711 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5712 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)5713 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
5714   return vshrn_n_u32(a, 9);
5715 }
5716 
5717 // CHECK-LABEL: @test_vshrn_n_u64(
5718 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5719 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5720 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5721 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5722 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)5723 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
5724   return vshrn_n_u64(a, 19);
5725 }
5726 
5727 // CHECK-LABEL: @test_vshrn_high_n_s16(
5728 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5729 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5730 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5731 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5732 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5733 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_s16(int8x8_t a,int16x8_t b)5734 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5735   return vshrn_high_n_s16(a, b, 3);
5736 }
5737 
5738 // CHECK-LABEL: @test_vshrn_high_n_s32(
5739 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5740 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5741 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5742 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5743 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5744 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_s32(int16x4_t a,int32x4_t b)5745 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5746   return vshrn_high_n_s32(a, b, 9);
5747 }
5748 
5749 // CHECK-LABEL: @test_vshrn_high_n_s64(
5750 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5751 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5752 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5753 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5754 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5755 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_s64(int32x2_t a,int64x2_t b)5756 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5757   return vshrn_high_n_s64(a, b, 19);
5758 }
5759 
5760 // CHECK-LABEL: @test_vshrn_high_n_u16(
5761 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5762 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5763 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5764 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5765 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5766 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5767 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5768   return vshrn_high_n_u16(a, b, 3);
5769 }
5770 
5771 // CHECK-LABEL: @test_vshrn_high_n_u32(
5772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5773 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5774 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5775 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5776 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5777 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5778 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5779   return vshrn_high_n_u32(a, b, 9);
5780 }
5781 
5782 // CHECK-LABEL: @test_vshrn_high_n_u64(
5783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5784 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5785 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5786 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5787 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5788 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5789 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5790   return vshrn_high_n_u64(a, b, 19);
5791 }
5792 
5793 // CHECK-LABEL: @test_vqshrun_n_s16(
5794 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5795 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5796 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5797 // CHECK:   ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)5798 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
5799   return vqshrun_n_s16(a, 3);
5800 }
5801 
5802 // CHECK-LABEL: @test_vqshrun_n_s32(
5803 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5804 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5805 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5806 // CHECK:   ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)5807 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
5808   return vqshrun_n_s32(a, 9);
5809 }
5810 
5811 // CHECK-LABEL: @test_vqshrun_n_s64(
5812 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5813 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5814 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5815 // CHECK:   ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)5816 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
5817   return vqshrun_n_s64(a, 19);
5818 }
5819 
5820 // CHECK-LABEL: @test_vqshrun_high_n_s16(
5821 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5822 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5823 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5824 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5825 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrun_high_n_s16(int8x8_t a,int16x8_t b)5826 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5827   return vqshrun_high_n_s16(a, b, 3);
5828 }
5829 
5830 // CHECK-LABEL: @test_vqshrun_high_n_s32(
5831 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5832 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5833 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5834 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5835 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrun_high_n_s32(int16x4_t a,int32x4_t b)5836 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
5837   return vqshrun_high_n_s32(a, b, 9);
5838 }
5839 
5840 // CHECK-LABEL: @test_vqshrun_high_n_s64(
5841 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5842 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5843 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5844 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5845 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrun_high_n_s64(int32x2_t a,int64x2_t b)5846 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
5847   return vqshrun_high_n_s64(a, b, 19);
5848 }
5849 
5850 // CHECK-LABEL: @test_vrshrn_n_s16(
5851 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5852 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5853 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5854 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)5855 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
5856   return vrshrn_n_s16(a, 3);
5857 }
5858 
5859 // CHECK-LABEL: @test_vrshrn_n_s32(
5860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5861 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5862 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5863 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)5864 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
5865   return vrshrn_n_s32(a, 9);
5866 }
5867 
5868 // CHECK-LABEL: @test_vrshrn_n_s64(
5869 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5870 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5871 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5872 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)5873 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
5874   return vrshrn_n_s64(a, 19);
5875 }
5876 
5877 // CHECK-LABEL: @test_vrshrn_n_u16(
5878 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5879 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5880 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5881 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)5882 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
5883   return vrshrn_n_u16(a, 3);
5884 }
5885 
5886 // CHECK-LABEL: @test_vrshrn_n_u32(
5887 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5888 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5889 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5890 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)5891 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
5892   return vrshrn_n_u32(a, 9);
5893 }
5894 
5895 // CHECK-LABEL: @test_vrshrn_n_u64(
5896 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5897 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5898 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5899 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)5900 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
5901   return vrshrn_n_u64(a, 19);
5902 }
5903 
5904 // CHECK-LABEL: @test_vrshrn_high_n_s16(
5905 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5906 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5907 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5908 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5909 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_s16(int8x8_t a,int16x8_t b)5910 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5911   return vrshrn_high_n_s16(a, b, 3);
5912 }
5913 
5914 // CHECK-LABEL: @test_vrshrn_high_n_s32(
5915 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5916 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5917 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5918 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5919 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_s32(int16x4_t a,int32x4_t b)5920 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5921   return vrshrn_high_n_s32(a, b, 9);
5922 }
5923 
5924 // CHECK-LABEL: @test_vrshrn_high_n_s64(
5925 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5926 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5927 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5928 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5929 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_s64(int32x2_t a,int64x2_t b)5930 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5931   return vrshrn_high_n_s64(a, b, 19);
5932 }
5933 
5934 // CHECK-LABEL: @test_vrshrn_high_n_u16(
5935 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5936 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5937 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5938 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5939 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5940 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5941   return vrshrn_high_n_u16(a, b, 3);
5942 }
5943 
5944 // CHECK-LABEL: @test_vrshrn_high_n_u32(
5945 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5946 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5947 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5948 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5949 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5950 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5951   return vrshrn_high_n_u32(a, b, 9);
5952 }
5953 
5954 // CHECK-LABEL: @test_vrshrn_high_n_u64(
5955 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5956 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5957 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5958 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5959 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5960 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5961   return vrshrn_high_n_u64(a, b, 19);
5962 }
5963 
5964 // CHECK-LABEL: @test_vqrshrun_n_s16(
5965 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5966 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5967 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5968 // CHECK:   ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)5969 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
5970   return vqrshrun_n_s16(a, 3);
5971 }
5972 
5973 // CHECK-LABEL: @test_vqrshrun_n_s32(
5974 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5975 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5976 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
5977 // CHECK:   ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)5978 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
5979   return vqrshrun_n_s32(a, 9);
5980 }
5981 
5982 // CHECK-LABEL: @test_vqrshrun_n_s64(
5983 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5984 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5985 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
5986 // CHECK:   ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)5987 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
5988   return vqrshrun_n_s64(a, 19);
5989 }
5990 
5991 // CHECK-LABEL: @test_vqrshrun_high_n_s16(
5992 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5993 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5994 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5995 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5996 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrun_high_n_s16(int8x8_t a,int16x8_t b)5997 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5998   return vqrshrun_high_n_s16(a, b, 3);
5999 }
6000 
6001 // CHECK-LABEL: @test_vqrshrun_high_n_s32(
6002 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6003 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6004 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6005 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6006 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrun_high_n_s32(int16x4_t a,int32x4_t b)6007 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6008   return vqrshrun_high_n_s32(a, b, 9);
6009 }
6010 
6011 // CHECK-LABEL: @test_vqrshrun_high_n_s64(
6012 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6013 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6014 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6015 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6016 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrun_high_n_s64(int32x2_t a,int64x2_t b)6017 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6018   return vqrshrun_high_n_s64(a, b, 19);
6019 }
6020 
6021 // CHECK-LABEL: @test_vqshrn_n_s16(
6022 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6023 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6025 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)6026 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
6027   return vqshrn_n_s16(a, 3);
6028 }
6029 
6030 // CHECK-LABEL: @test_vqshrn_n_s32(
6031 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6032 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6033 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6034 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)6035 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
6036   return vqshrn_n_s32(a, 9);
6037 }
6038 
6039 // CHECK-LABEL: @test_vqshrn_n_s64(
6040 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6041 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6042 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6043 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)6044 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
6045   return vqshrn_n_s64(a, 19);
6046 }
6047 
6048 // CHECK-LABEL: @test_vqshrn_n_u16(
6049 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6050 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6051 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6052 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)6053 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
6054   return vqshrn_n_u16(a, 3);
6055 }
6056 
6057 // CHECK-LABEL: @test_vqshrn_n_u32(
6058 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6059 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6060 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6061 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)6062 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
6063   return vqshrn_n_u32(a, 9);
6064 }
6065 
6066 // CHECK-LABEL: @test_vqshrn_n_u64(
6067 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6068 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6069 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6070 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)6071 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
6072   return vqshrn_n_u64(a, 19);
6073 }
6074 
6075 // CHECK-LABEL: @test_vqshrn_high_n_s16(
6076 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6077 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6078 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6079 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6080 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_s16(int8x8_t a,int16x8_t b)6081 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6082   return vqshrn_high_n_s16(a, b, 3);
6083 }
6084 
6085 // CHECK-LABEL: @test_vqshrn_high_n_s32(
6086 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6087 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6088 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6089 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6090 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_s32(int16x4_t a,int32x4_t b)6091 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6092   return vqshrn_high_n_s32(a, b, 9);
6093 }
6094 
6095 // CHECK-LABEL: @test_vqshrn_high_n_s64(
6096 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6097 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6098 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6099 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6100 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_s64(int32x2_t a,int64x2_t b)6101 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6102   return vqshrn_high_n_s64(a, b, 19);
6103 }
6104 
6105 // CHECK-LABEL: @test_vqshrn_high_n_u16(
6106 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6107 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6108 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6109 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6110 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6111 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6112   return vqshrn_high_n_u16(a, b, 3);
6113 }
6114 
6115 // CHECK-LABEL: @test_vqshrn_high_n_u32(
6116 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6117 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6118 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6119 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6120 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6121 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6122   return vqshrn_high_n_u32(a, b, 9);
6123 }
6124 
6125 // CHECK-LABEL: @test_vqshrn_high_n_u64(
6126 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6127 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6128 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6129 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6130 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6131 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6132   return vqshrn_high_n_u64(a, b, 19);
6133 }
6134 
6135 // CHECK-LABEL: @test_vqrshrn_n_s16(
6136 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6137 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6138 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6139 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)6140 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
6141   return vqrshrn_n_s16(a, 3);
6142 }
6143 
6144 // CHECK-LABEL: @test_vqrshrn_n_s32(
6145 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6146 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6147 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6148 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)6149 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
6150   return vqrshrn_n_s32(a, 9);
6151 }
6152 
6153 // CHECK-LABEL: @test_vqrshrn_n_s64(
6154 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6155 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6156 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6157 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)6158 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
6159   return vqrshrn_n_s64(a, 19);
6160 }
6161 
6162 // CHECK-LABEL: @test_vqrshrn_n_u16(
6163 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6164 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6165 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6166 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)6167 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
6168   return vqrshrn_n_u16(a, 3);
6169 }
6170 
6171 // CHECK-LABEL: @test_vqrshrn_n_u32(
6172 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6173 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6174 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6175 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)6176 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
6177   return vqrshrn_n_u32(a, 9);
6178 }
6179 
6180 // CHECK-LABEL: @test_vqrshrn_n_u64(
6181 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6182 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6183 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6184 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)6185 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
6186   return vqrshrn_n_u64(a, 19);
6187 }
6188 
6189 // CHECK-LABEL: @test_vqrshrn_high_n_s16(
6190 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6191 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6192 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6193 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6194 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_s16(int8x8_t a,int16x8_t b)6195 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6196   return vqrshrn_high_n_s16(a, b, 3);
6197 }
6198 
6199 // CHECK-LABEL: @test_vqrshrn_high_n_s32(
6200 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6201 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6202 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6203 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6204 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_s32(int16x4_t a,int32x4_t b)6205 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6206   return vqrshrn_high_n_s32(a, b, 9);
6207 }
6208 
6209 // CHECK-LABEL: @test_vqrshrn_high_n_s64(
6210 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6211 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6212 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6213 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6214 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_s64(int32x2_t a,int64x2_t b)6215 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6216   return vqrshrn_high_n_s64(a, b, 19);
6217 }
6218 
6219 // CHECK-LABEL: @test_vqrshrn_high_n_u16(
6220 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6221 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6222 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6223 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6224 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6225 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6226   return vqrshrn_high_n_u16(a, b, 3);
6227 }
6228 
6229 // CHECK-LABEL: @test_vqrshrn_high_n_u32(
6230 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6231 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6232 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6233 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6234 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6235 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6236   return vqrshrn_high_n_u32(a, b, 9);
6237 }
6238 
6239 // CHECK-LABEL: @test_vqrshrn_high_n_u64(
6240 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6241 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6242 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6243 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6244 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6245 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6246   return vqrshrn_high_n_u64(a, b, 19);
6247 }
6248 
6249 // CHECK-LABEL: @test_vshll_n_s8(
6250 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6251 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6252 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)6253 int16x8_t test_vshll_n_s8(int8x8_t a) {
6254   return vshll_n_s8(a, 3);
6255 }
6256 
6257 // CHECK-LABEL: @test_vshll_n_s16(
6258 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6259 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6260 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6261 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6262 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)6263 int32x4_t test_vshll_n_s16(int16x4_t a) {
6264   return vshll_n_s16(a, 9);
6265 }
6266 
6267 // CHECK-LABEL: @test_vshll_n_s32(
6268 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6269 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6270 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6271 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6272 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)6273 int64x2_t test_vshll_n_s32(int32x2_t a) {
6274   return vshll_n_s32(a, 19);
6275 }
6276 
6277 // CHECK-LABEL: @test_vshll_n_u8(
6278 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6279 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6280 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)6281 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
6282   return vshll_n_u8(a, 3);
6283 }
6284 
6285 // CHECK-LABEL: @test_vshll_n_u16(
6286 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6287 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6288 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6289 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6290 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)6291 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
6292   return vshll_n_u16(a, 9);
6293 }
6294 
6295 // CHECK-LABEL: @test_vshll_n_u32(
6296 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6297 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6298 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6299 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6300 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)6301 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
6302   return vshll_n_u32(a, 19);
6303 }
6304 
6305 // CHECK-LABEL: @test_vshll_high_n_s8(
6306 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6307 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6308 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6309 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_s8(int8x16_t a)6310 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
6311   return vshll_high_n_s8(a, 3);
6312 }
6313 
6314 // CHECK-LABEL: @test_vshll_high_n_s16(
6315 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6316 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6317 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6318 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6319 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6320 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_s16(int16x8_t a)6321 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
6322   return vshll_high_n_s16(a, 9);
6323 }
6324 
6325 // CHECK-LABEL: @test_vshll_high_n_s32(
6326 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6327 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6328 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6329 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6330 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6331 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_s32(int32x4_t a)6332 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
6333   return vshll_high_n_s32(a, 19);
6334 }
6335 
6336 // CHECK-LABEL: @test_vshll_high_n_u8(
6337 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6338 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6339 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6340 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_u8(uint8x16_t a)6341 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
6342   return vshll_high_n_u8(a, 3);
6343 }
6344 
6345 // CHECK-LABEL: @test_vshll_high_n_u16(
6346 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6347 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6348 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6349 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6350 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6351 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_u16(uint16x8_t a)6352 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
6353   return vshll_high_n_u16(a, 9);
6354 }
6355 
6356 // CHECK-LABEL: @test_vshll_high_n_u32(
6357 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6358 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6359 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6360 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6361 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6362 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_u32(uint32x4_t a)6363 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
6364   return vshll_high_n_u32(a, 19);
6365 }
6366 
6367 // CHECK-LABEL: @test_vmovl_s8(
6368 // CHECK:   [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6369 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)6370 int16x8_t test_vmovl_s8(int8x8_t a) {
6371   return vmovl_s8(a);
6372 }
6373 
6374 // CHECK-LABEL: @test_vmovl_s16(
6375 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6376 // CHECK:   [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6377 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)6378 int32x4_t test_vmovl_s16(int16x4_t a) {
6379   return vmovl_s16(a);
6380 }
6381 
6382 // CHECK-LABEL: @test_vmovl_s32(
6383 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6384 // CHECK:   [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6385 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)6386 int64x2_t test_vmovl_s32(int32x2_t a) {
6387   return vmovl_s32(a);
6388 }
6389 
6390 // CHECK-LABEL: @test_vmovl_u8(
6391 // CHECK:   [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6392 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)6393 uint16x8_t test_vmovl_u8(uint8x8_t a) {
6394   return vmovl_u8(a);
6395 }
6396 
6397 // CHECK-LABEL: @test_vmovl_u16(
6398 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6399 // CHECK:   [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6400 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)6401 uint32x4_t test_vmovl_u16(uint16x4_t a) {
6402   return vmovl_u16(a);
6403 }
6404 
6405 // CHECK-LABEL: @test_vmovl_u32(
6406 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6407 // CHECK:   [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6408 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)6409 uint64x2_t test_vmovl_u32(uint32x2_t a) {
6410   return vmovl_u32(a);
6411 }
6412 
6413 // CHECK-LABEL: @test_vmovl_high_s8(
6414 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6415 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6416 // CHECK:   ret <8 x i16> [[TMP0]]
test_vmovl_high_s8(int8x16_t a)6417 int16x8_t test_vmovl_high_s8(int8x16_t a) {
6418   return vmovl_high_s8(a);
6419 }
6420 
6421 // CHECK-LABEL: @test_vmovl_high_s16(
6422 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6423 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6424 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6425 // CHECK:   ret <4 x i32> [[TMP1]]
test_vmovl_high_s16(int16x8_t a)6426 int32x4_t test_vmovl_high_s16(int16x8_t a) {
6427   return vmovl_high_s16(a);
6428 }
6429 
6430 // CHECK-LABEL: @test_vmovl_high_s32(
6431 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6432 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6433 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6434 // CHECK:   ret <2 x i64> [[TMP1]]
test_vmovl_high_s32(int32x4_t a)6435 int64x2_t test_vmovl_high_s32(int32x4_t a) {
6436   return vmovl_high_s32(a);
6437 }
6438 
6439 // CHECK-LABEL: @test_vmovl_high_u8(
6440 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6441 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6442 // CHECK:   ret <8 x i16> [[TMP0]]
test_vmovl_high_u8(uint8x16_t a)6443 uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
6444   return vmovl_high_u8(a);
6445 }
6446 
6447 // CHECK-LABEL: @test_vmovl_high_u16(
6448 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6449 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6450 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6451 // CHECK:   ret <4 x i32> [[TMP1]]
test_vmovl_high_u16(uint16x8_t a)6452 uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
6453   return vmovl_high_u16(a);
6454 }
6455 
6456 // CHECK-LABEL: @test_vmovl_high_u32(
6457 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6458 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6459 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6460 // CHECK:   ret <2 x i64> [[TMP1]]
test_vmovl_high_u32(uint32x4_t a)6461 uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
6462   return vmovl_high_u32(a);
6463 }
6464 
6465 // CHECK-LABEL: @test_vcvt_n_f32_s32(
6466 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6467 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6468 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6469 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)6470 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
6471   return vcvt_n_f32_s32(a, 31);
6472 }
6473 
6474 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
6475 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6476 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6477 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6478 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)6479 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
6480   return vcvtq_n_f32_s32(a, 31);
6481 }
6482 
6483 // CHECK-LABEL: @test_vcvtq_n_f64_s64(
6484 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6485 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6486 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6487 // CHECK:   ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_s64(int64x2_t a)6488 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
6489   return vcvtq_n_f64_s64(a, 50);
6490 }
6491 
6492 // CHECK-LABEL: @test_vcvt_n_f32_u32(
6493 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6494 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6495 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6496 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)6497 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
6498   return vcvt_n_f32_u32(a, 31);
6499 }
6500 
6501 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
6502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6503 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6504 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6505 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)6506 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
6507   return vcvtq_n_f32_u32(a, 31);
6508 }
6509 
6510 // CHECK-LABEL: @test_vcvtq_n_f64_u64(
6511 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6512 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6513 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6514 // CHECK:   ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_u64(uint64x2_t a)6515 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
6516   return vcvtq_n_f64_u64(a, 50);
6517 }
6518 
6519 // CHECK-LABEL: @test_vcvt_n_s32_f32(
6520 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6521 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6522 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6523 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)6524 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
6525   return vcvt_n_s32_f32(a, 31);
6526 }
6527 
6528 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
6529 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6530 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6531 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6532 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)6533 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
6534   return vcvtq_n_s32_f32(a, 31);
6535 }
6536 
6537 // CHECK-LABEL: @test_vcvtq_n_s64_f64(
6538 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6539 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6540 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6541 // CHECK:   ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_s64_f64(float64x2_t a)6542 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
6543   return vcvtq_n_s64_f64(a, 50);
6544 }
6545 
6546 // CHECK-LABEL: @test_vcvt_n_u32_f32(
6547 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6548 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6549 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6550 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)6551 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
6552   return vcvt_n_u32_f32(a, 31);
6553 }
6554 
6555 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
6556 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6557 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6558 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6559 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)6560 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
6561   return vcvtq_n_u32_f32(a, 31);
6562 }
6563 
6564 // CHECK-LABEL: @test_vcvtq_n_u64_f64(
6565 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6566 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6567 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6568 // CHECK:   ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_u64_f64(float64x2_t a)6569 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
6570   return vcvtq_n_u64_f64(a, 50);
6571 }
6572 
6573 // CHECK-LABEL: @test_vaddl_s8(
6574 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6575 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6576 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6577 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)6578 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
6579   return vaddl_s8(a, b);
6580 }
6581 
6582 // CHECK-LABEL: @test_vaddl_s16(
6583 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6584 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6585 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6586 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6587 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6588 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)6589 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
6590   return vaddl_s16(a, b);
6591 }
6592 
6593 // CHECK-LABEL: @test_vaddl_s32(
6594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6595 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6596 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6597 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6598 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6599 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)6600 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
6601   return vaddl_s32(a, b);
6602 }
6603 
6604 // CHECK-LABEL: @test_vaddl_u8(
6605 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6606 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6607 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6608 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)6609 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
6610   return vaddl_u8(a, b);
6611 }
6612 
6613 // CHECK-LABEL: @test_vaddl_u16(
6614 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6615 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6616 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6617 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6618 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6619 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)6620 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
6621   return vaddl_u16(a, b);
6622 }
6623 
6624 // CHECK-LABEL: @test_vaddl_u32(
6625 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6626 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6628 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6629 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6630 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)6631 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
6632   return vaddl_u32(a, b);
6633 }
6634 
6635 // CHECK-LABEL: @test_vaddl_high_s8(
6636 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6637 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6638 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6639 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6640 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6641 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_high_s8(int8x16_t a,int8x16_t b)6642 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
6643   return vaddl_high_s8(a, b);
6644 }
6645 
6646 // CHECK-LABEL: @test_vaddl_high_s16(
6647 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6648 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6649 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6650 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6651 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6652 // CHECK:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6653 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6654 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_high_s16(int16x8_t a,int16x8_t b)6655 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
6656   return vaddl_high_s16(a, b);
6657 }
6658 
6659 // CHECK-LABEL: @test_vaddl_high_s32(
6660 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6661 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6662 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6663 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6664 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6665 // CHECK:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6666 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6667 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_high_s32(int32x4_t a,int32x4_t b)6668 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
6669   return vaddl_high_s32(a, b);
6670 }
6671 
6672 // CHECK-LABEL: @test_vaddl_high_u8(
6673 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6674 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6675 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6676 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6677 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6678 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_high_u8(uint8x16_t a,uint8x16_t b)6679 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
6680   return vaddl_high_u8(a, b);
6681 }
6682 
6683 // CHECK-LABEL: @test_vaddl_high_u16(
6684 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6685 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6686 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6687 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6688 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6689 // CHECK:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6690 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6691 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_high_u16(uint16x8_t a,uint16x8_t b)6692 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
6693   return vaddl_high_u16(a, b);
6694 }
6695 
6696 // CHECK-LABEL: @test_vaddl_high_u32(
6697 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6698 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6699 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6700 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6701 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6702 // CHECK:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6703 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6704 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_high_u32(uint32x4_t a,uint32x4_t b)6705 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
6706   return vaddl_high_u32(a, b);
6707 }
6708 
6709 // CHECK-LABEL: @test_vaddw_s8(
6710 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6711 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6712 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)6713 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
6714   return vaddw_s8(a, b);
6715 }
6716 
6717 // CHECK-LABEL: @test_vaddw_s16(
6718 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6719 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6720 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6721 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)6722 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
6723   return vaddw_s16(a, b);
6724 }
6725 
6726 // CHECK-LABEL: @test_vaddw_s32(
6727 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6728 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6729 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6730 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)6731 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
6732   return vaddw_s32(a, b);
6733 }
6734 
6735 // CHECK-LABEL: @test_vaddw_u8(
6736 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6737 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6738 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)6739 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
6740   return vaddw_u8(a, b);
6741 }
6742 
6743 // CHECK-LABEL: @test_vaddw_u16(
6744 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6745 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6746 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6747 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)6748 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
6749   return vaddw_u16(a, b);
6750 }
6751 
6752 // CHECK-LABEL: @test_vaddw_u32(
6753 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6754 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6755 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6756 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)6757 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
6758   return vaddw_u32(a, b);
6759 }
6760 
6761 // CHECK-LABEL: @test_vaddw_high_s8(
6762 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6763 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6764 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6765 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_high_s8(int16x8_t a,int8x16_t b)6766 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
6767   return vaddw_high_s8(a, b);
6768 }
6769 
6770 // CHECK-LABEL: @test_vaddw_high_s16(
6771 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6773 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6774 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6775 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_high_s16(int32x4_t a,int16x8_t b)6776 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
6777   return vaddw_high_s16(a, b);
6778 }
6779 
6780 // CHECK-LABEL: @test_vaddw_high_s32(
6781 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6782 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6783 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6784 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6785 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_high_s32(int64x2_t a,int32x4_t b)6786 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
6787   return vaddw_high_s32(a, b);
6788 }
6789 
6790 // CHECK-LABEL: @test_vaddw_high_u8(
6791 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6792 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6793 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6794 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_high_u8(uint16x8_t a,uint8x16_t b)6795 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
6796   return vaddw_high_u8(a, b);
6797 }
6798 
6799 // CHECK-LABEL: @test_vaddw_high_u16(
6800 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6801 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6802 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6803 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6804 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_high_u16(uint32x4_t a,uint16x8_t b)6805 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
6806   return vaddw_high_u16(a, b);
6807 }
6808 
6809 // CHECK-LABEL: @test_vaddw_high_u32(
6810 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6811 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6812 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6813 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6814 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_high_u32(uint64x2_t a,uint32x4_t b)6815 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
6816   return vaddw_high_u32(a, b);
6817 }
6818 
6819 // CHECK-LABEL: @test_vsubl_s8(
6820 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6821 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6822 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6823 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)6824 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
6825   return vsubl_s8(a, b);
6826 }
6827 
6828 // CHECK-LABEL: @test_vsubl_s16(
6829 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6830 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6831 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6832 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6833 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6834 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)6835 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
6836   return vsubl_s16(a, b);
6837 }
6838 
6839 // CHECK-LABEL: @test_vsubl_s32(
6840 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6841 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6842 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6843 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6844 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6845 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)6846 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
6847   return vsubl_s32(a, b);
6848 }
6849 
6850 // CHECK-LABEL: @test_vsubl_u8(
6851 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6852 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6853 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6854 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)6855 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
6856   return vsubl_u8(a, b);
6857 }
6858 
6859 // CHECK-LABEL: @test_vsubl_u16(
6860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6861 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6862 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6863 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6864 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6865 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)6866 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
6867   return vsubl_u16(a, b);
6868 }
6869 
6870 // CHECK-LABEL: @test_vsubl_u32(
6871 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6872 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6873 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6874 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6875 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6876 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)6877 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
6878   return vsubl_u32(a, b);
6879 }
6880 
6881 // CHECK-LABEL: @test_vsubl_high_s8(
6882 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6883 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6884 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6885 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6886 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6887 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_high_s8(int8x16_t a,int8x16_t b)6888 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
6889   return vsubl_high_s8(a, b);
6890 }
6891 
6892 // CHECK-LABEL: @test_vsubl_high_s16(
6893 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6895 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6896 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6897 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6898 // CHECK:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6899 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6900 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_high_s16(int16x8_t a,int16x8_t b)6901 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
6902   return vsubl_high_s16(a, b);
6903 }
6904 
6905 // CHECK-LABEL: @test_vsubl_high_s32(
6906 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6907 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6908 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6909 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6910 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6911 // CHECK:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6912 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6913 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_high_s32(int32x4_t a,int32x4_t b)6914 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
6915   return vsubl_high_s32(a, b);
6916 }
6917 
6918 // CHECK-LABEL: @test_vsubl_high_u8(
6919 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6920 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6921 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6922 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6923 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6924 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_high_u8(uint8x16_t a,uint8x16_t b)6925 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
6926   return vsubl_high_u8(a, b);
6927 }
6928 
6929 // CHECK-LABEL: @test_vsubl_high_u16(
6930 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6931 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6932 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6933 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6934 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6935 // CHECK:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6936 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6937 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_high_u16(uint16x8_t a,uint16x8_t b)6938 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
6939   return vsubl_high_u16(a, b);
6940 }
6941 
6942 // CHECK-LABEL: @test_vsubl_high_u32(
6943 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6944 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6945 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6946 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6947 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6948 // CHECK:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6949 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6950 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_high_u32(uint32x4_t a,uint32x4_t b)6951 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
6952   return vsubl_high_u32(a, b);
6953 }
6954 
6955 // CHECK-LABEL: @test_vsubw_s8(
6956 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6957 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6958 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)6959 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
6960   return vsubw_s8(a, b);
6961 }
6962 
6963 // CHECK-LABEL: @test_vsubw_s16(
6964 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6965 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6966 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6967 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)6968 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
6969   return vsubw_s16(a, b);
6970 }
6971 
6972 // CHECK-LABEL: @test_vsubw_s32(
6973 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6974 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6975 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
6976 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)6977 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
6978   return vsubw_s32(a, b);
6979 }
6980 
6981 // CHECK-LABEL: @test_vsubw_u8(
6982 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6983 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6984 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)6985 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
6986   return vsubw_u8(a, b);
6987 }
6988 
6989 // CHECK-LABEL: @test_vsubw_u16(
6990 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6991 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6992 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6993 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)6994 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
6995   return vsubw_u16(a, b);
6996 }
6997 
6998 // CHECK-LABEL: @test_vsubw_u32(
6999 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7000 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
7001 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7002 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)7003 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
7004   return vsubw_u32(a, b);
7005 }
7006 
7007 // CHECK-LABEL: @test_vsubw_high_s8(
7008 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7009 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7010 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7011 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_high_s8(int16x8_t a,int8x16_t b)7012 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
7013   return vsubw_high_s8(a, b);
7014 }
7015 
7016 // CHECK-LABEL: @test_vsubw_high_s16(
7017 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7018 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7019 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7020 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7021 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_high_s16(int32x4_t a,int16x8_t b)7022 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
7023   return vsubw_high_s16(a, b);
7024 }
7025 
7026 // CHECK-LABEL: @test_vsubw_high_s32(
7027 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7028 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7029 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7030 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7031 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_high_s32(int64x2_t a,int32x4_t b)7032 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
7033   return vsubw_high_s32(a, b);
7034 }
7035 
7036 // CHECK-LABEL: @test_vsubw_high_u8(
7037 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7038 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7039 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7040 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_high_u8(uint16x8_t a,uint8x16_t b)7041 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
7042   return vsubw_high_u8(a, b);
7043 }
7044 
7045 // CHECK-LABEL: @test_vsubw_high_u16(
7046 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7047 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7048 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7049 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7050 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_high_u16(uint32x4_t a,uint16x8_t b)7051 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
7052   return vsubw_high_u16(a, b);
7053 }
7054 
7055 // CHECK-LABEL: @test_vsubw_high_u32(
7056 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7057 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7058 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7059 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7060 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_high_u32(uint64x2_t a,uint32x4_t b)7061 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
7062   return vsubw_high_u32(a, b);
7063 }
7064 
7065 // CHECK-LABEL: @test_vaddhn_s16(
7066 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7067 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7068 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7069 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7070 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7071 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)7072 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
7073   return vaddhn_s16(a, b);
7074 }
7075 
7076 // CHECK-LABEL: @test_vaddhn_s32(
7077 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7078 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7079 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7080 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7081 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7082 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)7083 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
7084   return vaddhn_s32(a, b);
7085 }
7086 
7087 // CHECK-LABEL: @test_vaddhn_s64(
7088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7089 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7090 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7091 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7092 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7093 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)7094 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
7095   return vaddhn_s64(a, b);
7096 }
7097 
7098 // CHECK-LABEL: @test_vaddhn_u16(
7099 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7100 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7101 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7102 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7103 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7104 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)7105 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
7106   return vaddhn_u16(a, b);
7107 }
7108 
7109 // CHECK-LABEL: @test_vaddhn_u32(
7110 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7111 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7112 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7113 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7114 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7115 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)7116 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
7117   return vaddhn_u32(a, b);
7118 }
7119 
7120 // CHECK-LABEL: @test_vaddhn_u64(
7121 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7122 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7123 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7124 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7125 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7126 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)7127 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
7128   return vaddhn_u64(a, b);
7129 }
7130 
7131 // CHECK-LABEL: @test_vaddhn_high_s16(
7132 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7133 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7134 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7135 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7136 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7137 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7138 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7139 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7140   return vaddhn_high_s16(r, a, b);
7141 }
7142 
7143 // CHECK-LABEL: @test_vaddhn_high_s32(
7144 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7145 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7146 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7147 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7148 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7149 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7150 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7151 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7152   return vaddhn_high_s32(r, a, b);
7153 }
7154 
7155 // CHECK-LABEL: @test_vaddhn_high_s64(
7156 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7157 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7158 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7159 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7160 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7161 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7162 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7163 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7164   return vaddhn_high_s64(r, a, b);
7165 }
7166 
7167 // CHECK-LABEL: @test_vaddhn_high_u16(
7168 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7169 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7170 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7171 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7172 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7173 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7174 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7175 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7176   return vaddhn_high_u16(r, a, b);
7177 }
7178 
7179 // CHECK-LABEL: @test_vaddhn_high_u32(
7180 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7181 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7182 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7183 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7184 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7185 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7186 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7187 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7188   return vaddhn_high_u32(r, a, b);
7189 }
7190 
7191 // CHECK-LABEL: @test_vaddhn_high_u64(
7192 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7193 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7194 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7195 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7196 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7197 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7198 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7199 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7200   return vaddhn_high_u64(r, a, b);
7201 }
7202 
7203 // CHECK-LABEL: @test_vraddhn_s16(
7204 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7205 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7206 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7207 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)7208 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
7209   return vraddhn_s16(a, b);
7210 }
7211 
7212 // CHECK-LABEL: @test_vraddhn_s32(
7213 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7214 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7215 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7216 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7217 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)7218 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
7219   return vraddhn_s32(a, b);
7220 }
7221 
7222 // CHECK-LABEL: @test_vraddhn_s64(
7223 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7224 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7225 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7226 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7227 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)7228 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
7229   return vraddhn_s64(a, b);
7230 }
7231 
7232 // CHECK-LABEL: @test_vraddhn_u16(
7233 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7234 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7235 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7236 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)7237 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
7238   return vraddhn_u16(a, b);
7239 }
7240 
7241 // CHECK-LABEL: @test_vraddhn_u32(
7242 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7243 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7244 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7245 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7246 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)7247 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
7248   return vraddhn_u32(a, b);
7249 }
7250 
7251 // CHECK-LABEL: @test_vraddhn_u64(
7252 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7253 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7254 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7255 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7256 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)7257 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
7258   return vraddhn_u64(a, b);
7259 }
7260 
7261 // CHECK-LABEL: @test_vraddhn_high_s16(
7262 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7263 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7264 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7265 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7266 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7267 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7268   return vraddhn_high_s16(r, a, b);
7269 }
7270 
7271 // CHECK-LABEL: @test_vraddhn_high_s32(
7272 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7273 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7274 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7275 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7276 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7277 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7278 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7279   return vraddhn_high_s32(r, a, b);
7280 }
7281 
7282 // CHECK-LABEL: @test_vraddhn_high_s64(
7283 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7284 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7285 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7286 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7287 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7288 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7289 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7290   return vraddhn_high_s64(r, a, b);
7291 }
7292 
7293 // CHECK-LABEL: @test_vraddhn_high_u16(
7294 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7295 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7296 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7297 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7298 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7299 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7300   return vraddhn_high_u16(r, a, b);
7301 }
7302 
7303 // CHECK-LABEL: @test_vraddhn_high_u32(
7304 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7305 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7306 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7307 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7308 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7309 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7310 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7311   return vraddhn_high_u32(r, a, b);
7312 }
7313 
7314 // CHECK-LABEL: @test_vraddhn_high_u64(
7315 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7316 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7317 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7318 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7319 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7320 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7321 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7322   return vraddhn_high_u64(r, a, b);
7323 }
7324 
7325 // CHECK-LABEL: @test_vsubhn_s16(
7326 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7327 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7328 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7329 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7330 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7331 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)7332 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
7333   return vsubhn_s16(a, b);
7334 }
7335 
7336 // CHECK-LABEL: @test_vsubhn_s32(
7337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7339 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7340 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7341 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7342 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)7343 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
7344   return vsubhn_s32(a, b);
7345 }
7346 
7347 // CHECK-LABEL: @test_vsubhn_s64(
7348 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7349 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7350 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7351 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7352 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7353 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)7354 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
7355   return vsubhn_s64(a, b);
7356 }
7357 
7358 // CHECK-LABEL: @test_vsubhn_u16(
7359 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7360 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7361 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7362 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7363 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7364 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)7365 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
7366   return vsubhn_u16(a, b);
7367 }
7368 
7369 // CHECK-LABEL: @test_vsubhn_u32(
7370 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7371 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7372 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7373 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7374 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7375 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)7376 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
7377   return vsubhn_u32(a, b);
7378 }
7379 
7380 // CHECK-LABEL: @test_vsubhn_u64(
7381 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7382 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7383 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7384 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7385 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7386 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)7387 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
7388   return vsubhn_u64(a, b);
7389 }
7390 
7391 // CHECK-LABEL: @test_vsubhn_high_s16(
7392 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7393 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7394 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7395 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7396 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7397 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7398 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7399 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7400   return vsubhn_high_s16(r, a, b);
7401 }
7402 
7403 // CHECK-LABEL: @test_vsubhn_high_s32(
7404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7405 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7406 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7407 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7408 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7409 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7410 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7411 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7412   return vsubhn_high_s32(r, a, b);
7413 }
7414 
7415 // CHECK-LABEL: @test_vsubhn_high_s64(
7416 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7417 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7418 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7419 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7420 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7421 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7422 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7423 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7424   return vsubhn_high_s64(r, a, b);
7425 }
7426 
7427 // CHECK-LABEL: @test_vsubhn_high_u16(
7428 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7429 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7430 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7431 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7432 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7433 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7434 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7435 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7436   return vsubhn_high_u16(r, a, b);
7437 }
7438 
7439 // CHECK-LABEL: @test_vsubhn_high_u32(
7440 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7441 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7442 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7443 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7444 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7445 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7446 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7447 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7448   return vsubhn_high_u32(r, a, b);
7449 }
7450 
7451 // CHECK-LABEL: @test_vsubhn_high_u64(
7452 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7453 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7454 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7455 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7456 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7457 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7458 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7459 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7460   return vsubhn_high_u64(r, a, b);
7461 }
7462 
7463 // CHECK-LABEL: @test_vrsubhn_s16(
7464 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7465 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7466 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7467 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)7468 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
7469   return vrsubhn_s16(a, b);
7470 }
7471 
7472 // CHECK-LABEL: @test_vrsubhn_s32(
7473 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7474 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7475 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7476 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7477 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)7478 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
7479   return vrsubhn_s32(a, b);
7480 }
7481 
7482 // CHECK-LABEL: @test_vrsubhn_s64(
7483 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7484 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7485 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7486 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7487 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)7488 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
7489   return vrsubhn_s64(a, b);
7490 }
7491 
7492 // CHECK-LABEL: @test_vrsubhn_u16(
7493 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7494 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7495 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7496 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)7497 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
7498   return vrsubhn_u16(a, b);
7499 }
7500 
7501 // CHECK-LABEL: @test_vrsubhn_u32(
7502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7503 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7504 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7505 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7506 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)7507 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
7508   return vrsubhn_u32(a, b);
7509 }
7510 
7511 // CHECK-LABEL: @test_vrsubhn_u64(
7512 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7513 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7514 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7515 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7516 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)7517 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
7518   return vrsubhn_u64(a, b);
7519 }
7520 
7521 // CHECK-LABEL: @test_vrsubhn_high_s16(
7522 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7523 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7524 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7525 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7526 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7527 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7528   return vrsubhn_high_s16(r, a, b);
7529 }
7530 
7531 // CHECK-LABEL: @test_vrsubhn_high_s32(
7532 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7533 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7534 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7535 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7536 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7537 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7538 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7539   return vrsubhn_high_s32(r, a, b);
7540 }
7541 
7542 // CHECK-LABEL: @test_vrsubhn_high_s64(
7543 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7544 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7545 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7546 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7547 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7548 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7549 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7550   return vrsubhn_high_s64(r, a, b);
7551 }
7552 
7553 // CHECK-LABEL: @test_vrsubhn_high_u16(
7554 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7555 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7556 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7557 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7558 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7559 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7560   return vrsubhn_high_u16(r, a, b);
7561 }
7562 
7563 // CHECK-LABEL: @test_vrsubhn_high_u32(
7564 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7565 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7566 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7567 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7568 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7569 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7570 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7571   return vrsubhn_high_u32(r, a, b);
7572 }
7573 
7574 // CHECK-LABEL: @test_vrsubhn_high_u64(
7575 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7576 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7577 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7578 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7579 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7580 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7581 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7582   return vrsubhn_high_u64(r, a, b);
7583 }
7584 
7585 // CHECK-LABEL: @test_vabdl_s8(
7586 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7587 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7588 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)7589 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
7590   return vabdl_s8(a, b);
7591 }
7592 
7593 // CHECK-LABEL: @test_vabdl_s16(
7594 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7595 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7596 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7597 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7598 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7599 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)7600 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
7601   return vabdl_s16(a, b);
7602 }
7603 
7604 // CHECK-LABEL: @test_vabdl_s32(
7605 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7606 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7607 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7608 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7609 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7610 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)7611 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
7612   return vabdl_s32(a, b);
7613 }
7614 
7615 // CHECK-LABEL: @test_vabdl_u8(
7616 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7617 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7618 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)7619 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
7620   return vabdl_u8(a, b);
7621 }
7622 
7623 // CHECK-LABEL: @test_vabdl_u16(
7624 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7625 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7626 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7627 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7628 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7629 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)7630 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
7631   return vabdl_u16(a, b);
7632 }
7633 
7634 // CHECK-LABEL: @test_vabdl_u32(
7635 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7636 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7637 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7638 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7639 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7640 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)7641 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
7642   return vabdl_u32(a, b);
7643 }
7644 
7645 // CHECK-LABEL: @test_vabal_s8(
7646 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7647 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7648 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7649 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7650 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7651   return vabal_s8(a, b, c);
7652 }
7653 
7654 // CHECK-LABEL: @test_vabal_s16(
7655 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7656 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7657 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7658 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7659 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7660 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7661 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7662 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7663   return vabal_s16(a, b, c);
7664 }
7665 
7666 // CHECK-LABEL: @test_vabal_s32(
7667 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7668 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7669 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7670 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7671 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7672 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7673 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7674 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7675   return vabal_s32(a, b, c);
7676 }
7677 
7678 // CHECK-LABEL: @test_vabal_u8(
7679 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7680 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7681 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7682 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7683 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7684   return vabal_u8(a, b, c);
7685 }
7686 
7687 // CHECK-LABEL: @test_vabal_u16(
7688 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7689 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7690 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7691 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7692 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7693 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7694 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7695 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7696   return vabal_u16(a, b, c);
7697 }
7698 
7699 // CHECK-LABEL: @test_vabal_u32(
7700 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7701 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7702 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7703 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7704 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7705 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7706 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7707 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7708   return vabal_u32(a, b, c);
7709 }
7710 
7711 // CHECK-LABEL: @test_vabdl_high_s8(
7712 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7713 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7714 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7715 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7716 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_s8(int8x16_t a,int8x16_t b)7717 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
7718   return vabdl_high_s8(a, b);
7719 }
7720 
7721 // CHECK-LABEL: @test_vabdl_high_s16(
7722 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7723 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7724 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7725 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7726 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7727 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7728 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7729 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_s16(int16x8_t a,int16x8_t b)7730 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
7731   return vabdl_high_s16(a, b);
7732 }
7733 
7734 // CHECK-LABEL: @test_vabdl_high_s32(
7735 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7736 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7737 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7738 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7739 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7740 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7741 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7742 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_s32(int32x4_t a,int32x4_t b)7743 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
7744   return vabdl_high_s32(a, b);
7745 }
7746 
7747 // CHECK-LABEL: @test_vabdl_high_u8(
7748 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7749 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7750 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7751 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7752 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_u8(uint8x16_t a,uint8x16_t b)7753 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
7754   return vabdl_high_u8(a, b);
7755 }
7756 
7757 // CHECK-LABEL: @test_vabdl_high_u16(
7758 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7759 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7760 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7761 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7762 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7763 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7764 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7765 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_u16(uint16x8_t a,uint16x8_t b)7766 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
7767   return vabdl_high_u16(a, b);
7768 }
7769 
7770 // CHECK-LABEL: @test_vabdl_high_u32(
7771 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7772 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7773 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7774 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7775 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7776 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7777 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7778 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_u32(uint32x4_t a,uint32x4_t b)7779 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
7780   return vabdl_high_u32(a, b);
7781 }
7782 
7783 // CHECK-LABEL: @test_vabal_high_s8(
7784 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7785 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7786 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7787 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7788 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7789 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vabal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)7790 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
7791   return vabal_high_s8(a, b, c);
7792 }
7793 
7794 // CHECK-LABEL: @test_vabal_high_s16(
7795 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7796 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7798 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7799 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7800 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7801 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7802 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7803 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vabal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)7804 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
7805   return vabal_high_s16(a, b, c);
7806 }
7807 
7808 // CHECK-LABEL: @test_vabal_high_s32(
7809 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7810 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7811 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7812 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7813 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7814 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7815 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7816 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7817 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vabal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)7818 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
7819   return vabal_high_s32(a, b, c);
7820 }
7821 
7822 // CHECK-LABEL: @test_vabal_high_u8(
7823 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7824 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7825 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7826 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7827 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7828 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vabal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)7829 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
7830   return vabal_high_u8(a, b, c);
7831 }
7832 
7833 // CHECK-LABEL: @test_vabal_high_u16(
7834 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7835 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7836 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7837 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7838 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7839 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7840 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7841 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7842 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vabal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)7843 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
7844   return vabal_high_u16(a, b, c);
7845 }
7846 
7847 // CHECK-LABEL: @test_vabal_high_u32(
7848 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7849 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7850 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7851 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7852 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7853 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7854 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7855 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7856 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vabal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)7857 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
7858   return vabal_high_u32(a, b, c);
7859 }
7860 
7861 // CHECK-LABEL: @test_vmull_s8(
7862 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
7863 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)7864 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
7865   return vmull_s8(a, b);
7866 }
7867 
7868 // CHECK-LABEL: @test_vmull_s16(
7869 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7870 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7871 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
7872 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)7873 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
7874   return vmull_s16(a, b);
7875 }
7876 
7877 // CHECK-LABEL: @test_vmull_s32(
7878 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7879 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7880 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
7881 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)7882 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
7883   return vmull_s32(a, b);
7884 }
7885 
7886 // CHECK-LABEL: @test_vmull_u8(
7887 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
7888 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)7889 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
7890   return vmull_u8(a, b);
7891 }
7892 
7893 // CHECK-LABEL: @test_vmull_u16(
7894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7895 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7896 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
7897 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)7898 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
7899   return vmull_u16(a, b);
7900 }
7901 
7902 // CHECK-LABEL: @test_vmull_u32(
7903 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7904 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7905 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
7906 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)7907 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
7908   return vmull_u32(a, b);
7909 }
7910 
7911 // CHECK-LABEL: @test_vmull_high_s8(
7912 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7913 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7914 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7915 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_s8(int8x16_t a,int8x16_t b)7916 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
7917   return vmull_high_s8(a, b);
7918 }
7919 
7920 // CHECK-LABEL: @test_vmull_high_s16(
7921 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7922 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7925 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7926 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_s16(int16x8_t a,int16x8_t b)7927 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
7928   return vmull_high_s16(a, b);
7929 }
7930 
7931 // CHECK-LABEL: @test_vmull_high_s32(
7932 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7933 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7934 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7935 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7936 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7937 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_s32(int32x4_t a,int32x4_t b)7938 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
7939   return vmull_high_s32(a, b);
7940 }
7941 
7942 // CHECK-LABEL: @test_vmull_high_u8(
7943 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7944 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7945 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7946 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_u8(uint8x16_t a,uint8x16_t b)7947 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
7948   return vmull_high_u8(a, b);
7949 }
7950 
7951 // CHECK-LABEL: @test_vmull_high_u16(
7952 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7953 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7954 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7955 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7956 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7957 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_u16(uint16x8_t a,uint16x8_t b)7958 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
7959   return vmull_high_u16(a, b);
7960 }
7961 
7962 // CHECK-LABEL: @test_vmull_high_u32(
7963 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7964 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7965 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7966 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7967 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7968 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_u32(uint32x4_t a,uint32x4_t b)7969 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
7970   return vmull_high_u32(a, b);
7971 }
7972 
7973 // CHECK-LABEL: @test_vmlal_s8(
7974 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
7975 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7976 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7977 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7978   return vmlal_s8(a, b, c);
7979 }
7980 
7981 // CHECK-LABEL: @test_vmlal_s16(
7982 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7983 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7984 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
7985 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7986 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7987 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7988   return vmlal_s16(a, b, c);
7989 }
7990 
7991 // CHECK-LABEL: @test_vmlal_s32(
7992 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7993 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7994 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
7995 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7996 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7997 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7998   return vmlal_s32(a, b, c);
7999 }
8000 
8001 // CHECK-LABEL: @test_vmlal_u8(
8002 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8003 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8004 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8005 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8006   return vmlal_u8(a, b, c);
8007 }
8008 
8009 // CHECK-LABEL: @test_vmlal_u16(
8010 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8011 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8012 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8013 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8014 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8015 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8016   return vmlal_u16(a, b, c);
8017 }
8018 
8019 // CHECK-LABEL: @test_vmlal_u32(
8020 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8021 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8022 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8023 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8024 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8025 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8026   return vmlal_u32(a, b, c);
8027 }
8028 
8029 // CHECK-LABEL: @test_vmlal_high_s8(
8030 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8031 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8032 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8033 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8034 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8035 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8036   return vmlal_high_s8(a, b, c);
8037 }
8038 
8039 // CHECK-LABEL: @test_vmlal_high_s16(
8040 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8041 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8042 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8043 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8044 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8045 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8046 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8047 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8048   return vmlal_high_s16(a, b, c);
8049 }
8050 
8051 // CHECK-LABEL: @test_vmlal_high_s32(
8052 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8053 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8054 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8055 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8056 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8057 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8058 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8059 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8060   return vmlal_high_s32(a, b, c);
8061 }
8062 
8063 // CHECK-LABEL: @test_vmlal_high_u8(
8064 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8065 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8066 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8067 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8068 // CHECK:   ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8069 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8070   return vmlal_high_u8(a, b, c);
8071 }
8072 
8073 // CHECK-LABEL: @test_vmlal_high_u16(
8074 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8075 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8076 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8077 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8078 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8079 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8080 // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8081 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8082   return vmlal_high_u16(a, b, c);
8083 }
8084 
8085 // CHECK-LABEL: @test_vmlal_high_u32(
8086 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8087 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8089 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8090 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8091 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8092 // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8093 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8094   return vmlal_high_u32(a, b, c);
8095 }
8096 
8097 // CHECK-LABEL: @test_vmlsl_s8(
8098 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
8099 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8100 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)8101 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8102   return vmlsl_s8(a, b, c);
8103 }
8104 
8105 // CHECK-LABEL: @test_vmlsl_s16(
8106 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8107 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8108 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
8109 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8110 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8111 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8112   return vmlsl_s16(a, b, c);
8113 }
8114 
8115 // CHECK-LABEL: @test_vmlsl_s32(
8116 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8117 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8118 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
8119 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8120 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8121 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8122   return vmlsl_s32(a, b, c);
8123 }
8124 
8125 // CHECK-LABEL: @test_vmlsl_u8(
8126 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8127 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8128 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8129 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8130   return vmlsl_u8(a, b, c);
8131 }
8132 
8133 // CHECK-LABEL: @test_vmlsl_u16(
8134 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8135 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8136 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8137 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8138 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8139 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8140   return vmlsl_u16(a, b, c);
8141 }
8142 
8143 // CHECK-LABEL: @test_vmlsl_u32(
8144 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8145 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8146 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8147 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8148 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8149 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8150   return vmlsl_u32(a, b, c);
8151 }
8152 
8153 // CHECK-LABEL: @test_vmlsl_high_s8(
8154 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8155 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8156 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8157 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8158 // CHECK:   ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8159 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8160   return vmlsl_high_s8(a, b, c);
8161 }
8162 
8163 // CHECK-LABEL: @test_vmlsl_high_s16(
8164 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8165 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8166 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8167 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8168 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8169 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8170 // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8171 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8172   return vmlsl_high_s16(a, b, c);
8173 }
8174 
8175 // CHECK-LABEL: @test_vmlsl_high_s32(
8176 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8177 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8178 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8179 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8180 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8181 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8182 // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8183 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8184   return vmlsl_high_s32(a, b, c);
8185 }
8186 
8187 // CHECK-LABEL: @test_vmlsl_high_u8(
8188 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8189 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8190 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8191 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8192 // CHECK:   ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8193 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8194   return vmlsl_high_u8(a, b, c);
8195 }
8196 
8197 // CHECK-LABEL: @test_vmlsl_high_u16(
8198 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8199 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8200 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8201 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8202 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8203 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8204 // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8205 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8206   return vmlsl_high_u16(a, b, c);
8207 }
8208 
8209 // CHECK-LABEL: @test_vmlsl_high_u32(
8210 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8211 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8212 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8213 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8214 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8215 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8216 // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8217 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8218   return vmlsl_high_u32(a, b, c);
8219 }
8220 
8221 // CHECK-LABEL: @test_vqdmull_s16(
8222 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8223 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8224 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
8225 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
8226 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)8227 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
8228   return vqdmull_s16(a, b);
8229 }
8230 
8231 // CHECK-LABEL: @test_vqdmull_s32(
8232 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8233 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8234 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
8235 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
8236 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)8237 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
8238   return vqdmull_s32(a, b);
8239 }
8240 
8241 // CHECK-LABEL: @test_vqdmlal_s16(
8242 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8243 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8244 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8245 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8246 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8247 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8248 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8249   return vqdmlal_s16(a, b, c);
8250 }
8251 
8252 // CHECK-LABEL: @test_vqdmlal_s32(
8253 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8254 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8255 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8256 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8257 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8258 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8259 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8260   return vqdmlal_s32(a, b, c);
8261 }
8262 
8263 // CHECK-LABEL: @test_vqdmlsl_s16(
8264 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8265 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8266 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8267 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8268 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8269 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8270 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8271   return vqdmlsl_s16(a, b, c);
8272 }
8273 
8274 // CHECK-LABEL: @test_vqdmlsl_s32(
8275 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8276 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8277 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8278 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8279 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8280 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8281 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8282   return vqdmlsl_s32(a, b, c);
8283 }
8284 
8285 // CHECK-LABEL: @test_vqdmull_high_s16(
8286 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8287 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8288 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8289 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8290 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8291 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
8292 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s16(int16x8_t a,int16x8_t b)8293 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
8294   return vqdmull_high_s16(a, b);
8295 }
8296 
8297 // CHECK-LABEL: @test_vqdmull_high_s32(
8298 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8299 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8300 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8301 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8302 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8303 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
8304 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s32(int32x4_t a,int32x4_t b)8305 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
8306   return vqdmull_high_s32(a, b);
8307 }
8308 
8309 // CHECK-LABEL: @test_vqdmlal_high_s16(
8310 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8311 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8312 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8313 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8314 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8315 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8316 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8317 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8318 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8319   return vqdmlal_high_s16(a, b, c);
8320 }
8321 
8322 // CHECK-LABEL: @test_vqdmlal_high_s32(
8323 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8324 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8325 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8326 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8327 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8328 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8329 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8330 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8331 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8332   return vqdmlal_high_s32(a, b, c);
8333 }
8334 
8335 // CHECK-LABEL: @test_vqdmlsl_high_s16(
8336 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8337 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8338 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8339 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8340 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8341 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8342 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8343 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8344 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8345   return vqdmlsl_high_s16(a, b, c);
8346 }
8347 
8348 // CHECK-LABEL: @test_vqdmlsl_high_s32(
8349 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8350 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8351 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8352 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8353 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8354 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8355 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8356 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8357 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8358   return vqdmlsl_high_s32(a, b, c);
8359 }
8360 
8361 // CHECK-LABEL: @test_vmull_p8(
8362 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
8363 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)8364 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8365   return vmull_p8(a, b);
8366 }
8367 
8368 // CHECK-LABEL: @test_vmull_high_p8(
8369 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8370 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8371 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8372 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_p8(poly8x16_t a,poly8x16_t b)8373 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
8374   return vmull_high_p8(a, b);
8375 }
8376 
8377 // CHECK-LABEL: @test_vaddd_s64(
8378 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
8379 // CHECK:   ret i64 [[VADDD_I]]
test_vaddd_s64(int64_t a,int64_t b)8380 int64_t test_vaddd_s64(int64_t a, int64_t b) {
8381   return vaddd_s64(a, b);
8382 }
8383 
8384 // CHECK-LABEL: @test_vaddd_u64(
8385 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
8386 // CHECK:   ret i64 [[VADDD_I]]
test_vaddd_u64(uint64_t a,uint64_t b)8387 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
8388   return vaddd_u64(a, b);
8389 }
8390 
8391 // CHECK-LABEL: @test_vsubd_s64(
8392 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
8393 // CHECK:   ret i64 [[VSUBD_I]]
test_vsubd_s64(int64_t a,int64_t b)8394 int64_t test_vsubd_s64(int64_t a, int64_t b) {
8395   return vsubd_s64(a, b);
8396 }
8397 
8398 // CHECK-LABEL: @test_vsubd_u64(
8399 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
8400 // CHECK:   ret i64 [[VSUBD_I]]
test_vsubd_u64(uint64_t a,uint64_t b)8401 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
8402   return vsubd_u64(a, b);
8403 }
8404 
8405 // CHECK-LABEL: @test_vqaddb_s8(
8406 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8407 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8408 // CHECK:   [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8409 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
8410 // CHECK:   ret i8 [[TMP2]]
test_vqaddb_s8(int8_t a,int8_t b)8411 int8_t test_vqaddb_s8(int8_t a, int8_t b) {
8412   return vqaddb_s8(a, b);
8413 }
8414 
8415 // CHECK-LABEL: @test_vqaddh_s16(
8416 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8417 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8418 // CHECK:   [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8419 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
8420 // CHECK:   ret i16 [[TMP2]]
test_vqaddh_s16(int16_t a,int16_t b)8421 int16_t test_vqaddh_s16(int16_t a, int16_t b) {
8422   return vqaddh_s16(a, b);
8423 }
8424 
8425 // CHECK-LABEL: @test_vqadds_s32(
8426 // CHECK:   [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b)
8427 // CHECK:   ret i32 [[VQADDS_S32_I]]
test_vqadds_s32(int32_t a,int32_t b)8428 int32_t test_vqadds_s32(int32_t a, int32_t b) {
8429   return vqadds_s32(a, b);
8430 }
8431 
8432 // CHECK-LABEL: @test_vqaddd_s64(
8433 // CHECK:   [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b)
8434 // CHECK:   ret i64 [[VQADDD_S64_I]]
test_vqaddd_s64(int64_t a,int64_t b)8435 int64_t test_vqaddd_s64(int64_t a, int64_t b) {
8436   return vqaddd_s64(a, b);
8437 }
8438 
8439 // CHECK-LABEL: @test_vqaddb_u8(
8440 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8441 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8442 // CHECK:   [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8443 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
8444 // CHECK:   ret i8 [[TMP2]]
test_vqaddb_u8(uint8_t a,uint8_t b)8445 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
8446   return vqaddb_u8(a, b);
8447 }
8448 
8449 // CHECK-LABEL: @test_vqaddh_u16(
8450 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8451 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8452 // CHECK:   [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8453 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
8454 // CHECK:   ret i16 [[TMP2]]
test_vqaddh_u16(uint16_t a,uint16_t b)8455 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
8456   return vqaddh_u16(a, b);
8457 }
8458 
8459 // CHECK-LABEL: @test_vqadds_u32(
8460 // CHECK:   [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b)
8461 // CHECK:   ret i32 [[VQADDS_U32_I]]
test_vqadds_u32(uint32_t a,uint32_t b)8462 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
8463   return vqadds_u32(a, b);
8464 }
8465 
8466 // CHECK-LABEL: @test_vqaddd_u64(
8467 // CHECK:   [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b)
8468 // CHECK:   ret i64 [[VQADDD_U64_I]]
test_vqaddd_u64(uint64_t a,uint64_t b)8469 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
8470   return vqaddd_u64(a, b);
8471 }
8472 
8473 // CHECK-LABEL: @test_vqsubb_s8(
8474 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8475 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8476 // CHECK:   [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8477 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
8478 // CHECK:   ret i8 [[TMP2]]
test_vqsubb_s8(int8_t a,int8_t b)8479 int8_t test_vqsubb_s8(int8_t a, int8_t b) {
8480   return vqsubb_s8(a, b);
8481 }
8482 
8483 // CHECK-LABEL: @test_vqsubh_s16(
8484 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8485 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8486 // CHECK:   [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8487 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
8488 // CHECK:   ret i16 [[TMP2]]
test_vqsubh_s16(int16_t a,int16_t b)8489 int16_t test_vqsubh_s16(int16_t a, int16_t b) {
8490   return vqsubh_s16(a, b);
8491 }
8492 
8493 // CHECK-LABEL: @test_vqsubs_s32(
8494 // CHECK:   [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
8495 // CHECK:   ret i32 [[VQSUBS_S32_I]]
test_vqsubs_s32(int32_t a,int32_t b)8496 int32_t test_vqsubs_s32(int32_t a, int32_t b) {
8497   return vqsubs_s32(a, b);
8498 }
8499 
8500 // CHECK-LABEL: @test_vqsubd_s64(
8501 // CHECK:   [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b)
8502 // CHECK:   ret i64 [[VQSUBD_S64_I]]
test_vqsubd_s64(int64_t a,int64_t b)8503 int64_t test_vqsubd_s64(int64_t a, int64_t b) {
8504   return vqsubd_s64(a, b);
8505 }
8506 
8507 // CHECK-LABEL: @test_vqsubb_u8(
8508 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8509 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8510 // CHECK:   [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8511 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
8512 // CHECK:   ret i8 [[TMP2]]
test_vqsubb_u8(uint8_t a,uint8_t b)8513 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
8514   return vqsubb_u8(a, b);
8515 }
8516 
8517 // CHECK-LABEL: @test_vqsubh_u16(
8518 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8519 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8520 // CHECK:   [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8521 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
8522 // CHECK:   ret i16 [[TMP2]]
test_vqsubh_u16(uint16_t a,uint16_t b)8523 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
8524   return vqsubh_u16(a, b);
8525 }
8526 
8527 // CHECK-LABEL: @test_vqsubs_u32(
8528 // CHECK:   [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
8529 // CHECK:   ret i32 [[VQSUBS_U32_I]]
test_vqsubs_u32(uint32_t a,uint32_t b)8530 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
8531   return vqsubs_u32(a, b);
8532 }
8533 
8534 // CHECK-LABEL: @test_vqsubd_u64(
8535 // CHECK:   [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
8536 // CHECK:   ret i64 [[VQSUBD_U64_I]]
test_vqsubd_u64(uint64_t a,uint64_t b)8537 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
8538   return vqsubd_u64(a, b);
8539 }
8540 
8541 // CHECK-LABEL: @test_vshld_s64(
8542 // CHECK:   [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
8543 // CHECK:   ret i64 [[VSHLD_S64_I]]
test_vshld_s64(int64_t a,int64_t b)8544 int64_t test_vshld_s64(int64_t a, int64_t b) {
8545   return vshld_s64(a, b);
8546 }
8547 
8548 // CHECK-LABEL: @test_vshld_u64(
8549 // CHECK:   [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
8550 // CHECK:   ret i64 [[VSHLD_U64_I]]
test_vshld_u64(uint64_t a,uint64_t b)8551 uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
8552   return vshld_u64(a, b);
8553 }
8554 
8555 // CHECK-LABEL: @test_vqshlb_s8(
8556 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8557 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8558 // CHECK:   [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8559 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
8560 // CHECK:   ret i8 [[TMP2]]
test_vqshlb_s8(int8_t a,int8_t b)8561 int8_t test_vqshlb_s8(int8_t a, int8_t b) {
8562   return vqshlb_s8(a, b);
8563 }
8564 
8565 // CHECK-LABEL: @test_vqshlh_s16(
8566 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8567 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8568 // CHECK:   [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8569 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
8570 // CHECK:   ret i16 [[TMP2]]
test_vqshlh_s16(int16_t a,int16_t b)8571 int16_t test_vqshlh_s16(int16_t a, int16_t b) {
8572   return vqshlh_s16(a, b);
8573 }
8574 
8575 // CHECK-LABEL: @test_vqshls_s32(
8576 // CHECK:   [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
8577 // CHECK:   ret i32 [[VQSHLS_S32_I]]
test_vqshls_s32(int32_t a,int32_t b)8578 int32_t test_vqshls_s32(int32_t a, int32_t b) {
8579   return vqshls_s32(a, b);
8580 }
8581 
8582 // CHECK-LABEL: @test_vqshld_s64(
8583 // CHECK:   [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
8584 // CHECK:   ret i64 [[VQSHLD_S64_I]]
test_vqshld_s64(int64_t a,int64_t b)8585 int64_t test_vqshld_s64(int64_t a, int64_t b) {
8586   return vqshld_s64(a, b);
8587 }
8588 
8589 // CHECK-LABEL: @test_vqshlb_u8(
8590 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8591 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8592 // CHECK:   [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8593 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
8594 // CHECK:   ret i8 [[TMP2]]
test_vqshlb_u8(uint8_t a,uint8_t b)8595 uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
8596   return vqshlb_u8(a, b);
8597 }
8598 
8599 // CHECK-LABEL: @test_vqshlh_u16(
8600 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8601 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8602 // CHECK:   [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8603 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
8604 // CHECK:   ret i16 [[TMP2]]
test_vqshlh_u16(uint16_t a,uint16_t b)8605 uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
8606   return vqshlh_u16(a, b);
8607 }
8608 
8609 // CHECK-LABEL: @test_vqshls_u32(
8610 // CHECK:   [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
8611 // CHECK:   ret i32 [[VQSHLS_U32_I]]
test_vqshls_u32(uint32_t a,uint32_t b)8612 uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
8613   return vqshls_u32(a, b);
8614 }
8615 
8616 // CHECK-LABEL: @test_vqshld_u64(
8617 // CHECK:   [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
8618 // CHECK:   ret i64 [[VQSHLD_U64_I]]
test_vqshld_u64(uint64_t a,uint64_t b)8619 uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
8620   return vqshld_u64(a, b);
8621 }
8622 
8623 // CHECK-LABEL: @test_vrshld_s64(
8624 // CHECK:   [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
8625 // CHECK:   ret i64 [[VRSHLD_S64_I]]
test_vrshld_s64(int64_t a,int64_t b)8626 int64_t test_vrshld_s64(int64_t a, int64_t b) {
8627   return vrshld_s64(a, b);
8628 }
8629 
8630 // CHECK-LABEL: @test_vrshld_u64(
8631 // CHECK:   [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
8632 // CHECK:   ret i64 [[VRSHLD_U64_I]]
test_vrshld_u64(uint64_t a,uint64_t b)8633 uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
8634   return vrshld_u64(a, b);
8635 }
8636 
8637 // CHECK-LABEL: @test_vqrshlb_s8(
8638 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8639 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8640 // CHECK:   [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8641 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
8642 // CHECK:   ret i8 [[TMP2]]
test_vqrshlb_s8(int8_t a,int8_t b)8643 int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
8644   return vqrshlb_s8(a, b);
8645 }
8646 
8647 // CHECK-LABEL: @test_vqrshlh_s16(
8648 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8649 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8650 // CHECK:   [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8651 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
8652 // CHECK:   ret i16 [[TMP2]]
test_vqrshlh_s16(int16_t a,int16_t b)8653 int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
8654   return vqrshlh_s16(a, b);
8655 }
8656 
8657 // CHECK-LABEL: @test_vqrshls_s32(
8658 // CHECK:   [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
8659 // CHECK:   ret i32 [[VQRSHLS_S32_I]]
test_vqrshls_s32(int32_t a,int32_t b)8660 int32_t test_vqrshls_s32(int32_t a, int32_t b) {
8661   return vqrshls_s32(a, b);
8662 }
8663 
8664 // CHECK-LABEL: @test_vqrshld_s64(
8665 // CHECK:   [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
8666 // CHECK:   ret i64 [[VQRSHLD_S64_I]]
test_vqrshld_s64(int64_t a,int64_t b)8667 int64_t test_vqrshld_s64(int64_t a, int64_t b) {
8668   return vqrshld_s64(a, b);
8669 }
8670 
8671 // CHECK-LABEL: @test_vqrshlb_u8(
8672 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8673 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8674 // CHECK:   [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8675 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
8676 // CHECK:   ret i8 [[TMP2]]
test_vqrshlb_u8(uint8_t a,uint8_t b)8677 uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
8678   return vqrshlb_u8(a, b);
8679 }
8680 
8681 // CHECK-LABEL: @test_vqrshlh_u16(
8682 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8683 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8684 // CHECK:   [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8685 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
8686 // CHECK:   ret i16 [[TMP2]]
test_vqrshlh_u16(uint16_t a,uint16_t b)8687 uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
8688   return vqrshlh_u16(a, b);
8689 }
8690 
8691 // CHECK-LABEL: @test_vqrshls_u32(
8692 // CHECK:   [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
8693 // CHECK:   ret i32 [[VQRSHLS_U32_I]]
test_vqrshls_u32(uint32_t a,uint32_t b)8694 uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
8695   return vqrshls_u32(a, b);
8696 }
8697 
8698 // CHECK-LABEL: @test_vqrshld_u64(
8699 // CHECK:   [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
8700 // CHECK:   ret i64 [[VQRSHLD_U64_I]]
test_vqrshld_u64(uint64_t a,uint64_t b)8701 uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
8702   return vqrshld_u64(a, b);
8703 }
8704 
8705 // CHECK-LABEL: @test_vpaddd_s64(
8706 // CHECK:   [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
8707 // CHECK:   ret i64 [[VPADDD_S64_I]]
test_vpaddd_s64(int64x2_t a)8708 int64_t test_vpaddd_s64(int64x2_t a) {
8709   return vpaddd_s64(a);
8710 }
8711 
8712 // CHECK-LABEL: @test_vpadds_f32(
8713 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
8714 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
8715 // CHECK:   [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
8716 // CHECK:   ret float [[VPADDD_I]]
test_vpadds_f32(float32x2_t a)8717 float32_t test_vpadds_f32(float32x2_t a) {
8718   return vpadds_f32(a);
8719 }
8720 
8721 // CHECK-LABEL: @test_vpaddd_f64(
8722 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
8723 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
8724 // CHECK:   [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
8725 // CHECK:   ret double [[VPADDD_I]]
test_vpaddd_f64(float64x2_t a)8726 float64_t test_vpaddd_f64(float64x2_t a) {
8727   return vpaddd_f64(a);
8728 }
8729 
8730 // CHECK-LABEL: @test_vpmaxnms_f32(
8731 // CHECK:   [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
8732 // CHECK:   ret float [[VPMAXNMS_F32_I]]
test_vpmaxnms_f32(float32x2_t a)8733 float32_t test_vpmaxnms_f32(float32x2_t a) {
8734   return vpmaxnms_f32(a);
8735 }
8736 
8737 // CHECK-LABEL: @test_vpmaxnmqd_f64(
8738 // CHECK:   [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
8739 // CHECK:   ret double [[VPMAXNMQD_F64_I]]
test_vpmaxnmqd_f64(float64x2_t a)8740 float64_t test_vpmaxnmqd_f64(float64x2_t a) {
8741   return vpmaxnmqd_f64(a);
8742 }
8743 
8744 // CHECK-LABEL: @test_vpmaxs_f32(
8745 // CHECK:   [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
8746 // CHECK:   ret float [[VPMAXS_F32_I]]
test_vpmaxs_f32(float32x2_t a)8747 float32_t test_vpmaxs_f32(float32x2_t a) {
8748   return vpmaxs_f32(a);
8749 }
8750 
8751 // CHECK-LABEL: @test_vpmaxqd_f64(
8752 // CHECK:   [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
8753 // CHECK:   ret double [[VPMAXQD_F64_I]]
test_vpmaxqd_f64(float64x2_t a)8754 float64_t test_vpmaxqd_f64(float64x2_t a) {
8755   return vpmaxqd_f64(a);
8756 }
8757 
8758 // CHECK-LABEL: @test_vpminnms_f32(
8759 // CHECK:   [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
8760 // CHECK:   ret float [[VPMINNMS_F32_I]]
test_vpminnms_f32(float32x2_t a)8761 float32_t test_vpminnms_f32(float32x2_t a) {
8762   return vpminnms_f32(a);
8763 }
8764 
8765 // CHECK-LABEL: @test_vpminnmqd_f64(
8766 // CHECK:   [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
8767 // CHECK:   ret double [[VPMINNMQD_F64_I]]
test_vpminnmqd_f64(float64x2_t a)8768 float64_t test_vpminnmqd_f64(float64x2_t a) {
8769   return vpminnmqd_f64(a);
8770 }
8771 
8772 // CHECK-LABEL: @test_vpmins_f32(
8773 // CHECK:   [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
8774 // CHECK:   ret float [[VPMINS_F32_I]]
test_vpmins_f32(float32x2_t a)8775 float32_t test_vpmins_f32(float32x2_t a) {
8776   return vpmins_f32(a);
8777 }
8778 
8779 // CHECK-LABEL: @test_vpminqd_f64(
8780 // CHECK:   [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
8781 // CHECK:   ret double [[VPMINQD_F64_I]]
test_vpminqd_f64(float64x2_t a)8782 float64_t test_vpminqd_f64(float64x2_t a) {
8783   return vpminqd_f64(a);
8784 }
8785 
8786 // CHECK-LABEL: @test_vqdmulhh_s16(
8787 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8788 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8789 // CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8790 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
8791 // CHECK:   ret i16 [[TMP2]]
test_vqdmulhh_s16(int16_t a,int16_t b)8792 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
8793   return vqdmulhh_s16(a, b);
8794 }
8795 
8796 // CHECK-LABEL: @test_vqdmulhs_s32(
8797 // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
8798 // CHECK:   ret i32 [[VQDMULHS_S32_I]]
test_vqdmulhs_s32(int32_t a,int32_t b)8799 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
8800   return vqdmulhs_s32(a, b);
8801 }
8802 
8803 // CHECK-LABEL: @test_vqrdmulhh_s16(
8804 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8805 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8806 // CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8807 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
8808 // CHECK:   ret i16 [[TMP2]]
test_vqrdmulhh_s16(int16_t a,int16_t b)8809 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
8810   return vqrdmulhh_s16(a, b);
8811 }
8812 
8813 // CHECK-LABEL: @test_vqrdmulhs_s32(
8814 // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b)
8815 // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
test_vqrdmulhs_s32(int32_t a,int32_t b)8816 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
8817   return vqrdmulhs_s32(a, b);
8818 }
8819 
8820 // CHECK-LABEL: @test_vmulxs_f32(
8821 // CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
8822 // CHECK:   ret float [[VMULXS_F32_I]]
test_vmulxs_f32(float32_t a,float32_t b)8823 float32_t test_vmulxs_f32(float32_t a, float32_t b) {
8824   return vmulxs_f32(a, b);
8825 }
8826 
8827 // CHECK-LABEL: @test_vmulxd_f64(
8828 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
8829 // CHECK:   ret double [[VMULXD_F64_I]]
test_vmulxd_f64(float64_t a,float64_t b)8830 float64_t test_vmulxd_f64(float64_t a, float64_t b) {
8831   return vmulxd_f64(a, b);
8832 }
8833 
8834 // CHECK-LABEL: @test_vmulx_f64(
8835 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
8836 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
8837 // CHECK:   [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
8838 // CHECK:   ret <1 x double> [[VMULX2_I]]
test_vmulx_f64(float64x1_t a,float64x1_t b)8839 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
8840   return vmulx_f64(a, b);
8841 }
8842 
8843 // CHECK-LABEL: @test_vrecpss_f32(
8844 // CHECK:   [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
8845 // CHECK:   ret float [[VRECPS_I]]
test_vrecpss_f32(float32_t a,float32_t b)8846 float32_t test_vrecpss_f32(float32_t a, float32_t b) {
8847   return vrecpss_f32(a, b);
8848 }
8849 
8850 // CHECK-LABEL: @test_vrecpsd_f64(
8851 // CHECK:   [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
8852 // CHECK:   ret double [[VRECPS_I]]
test_vrecpsd_f64(float64_t a,float64_t b)8853 float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
8854   return vrecpsd_f64(a, b);
8855 }
8856 
8857 // CHECK-LABEL: @test_vrsqrtss_f32(
8858 // CHECK:   [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
8859 // CHECK:   ret float [[VRSQRTSS_F32_I]]
test_vrsqrtss_f32(float32_t a,float32_t b)8860 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
8861   return vrsqrtss_f32(a, b);
8862 }
8863 
8864 // CHECK-LABEL: @test_vrsqrtsd_f64(
8865 // CHECK:   [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
8866 // CHECK:   ret double [[VRSQRTSD_F64_I]]
test_vrsqrtsd_f64(float64_t a,float64_t b)8867 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
8868   return vrsqrtsd_f64(a, b);
8869 }
8870 
8871 // CHECK-LABEL: @test_vcvts_f32_s32(
8872 // CHECK:   [[TMP0:%.*]] = sitofp i32 %a to float
8873 // CHECK:   ret float [[TMP0]]
test_vcvts_f32_s32(int32_t a)8874 float32_t test_vcvts_f32_s32(int32_t a) {
8875   return vcvts_f32_s32(a);
8876 }
8877 
8878 // CHECK-LABEL: @test_vcvtd_f64_s64(
8879 // CHECK:   [[TMP0:%.*]] = sitofp i64 %a to double
8880 // CHECK:   ret double [[TMP0]]
test_vcvtd_f64_s64(int64_t a)8881 float64_t test_vcvtd_f64_s64(int64_t a) {
8882   return vcvtd_f64_s64(a);
8883 }
8884 
8885 // CHECK-LABEL: @test_vcvts_f32_u32(
8886 // CHECK:   [[TMP0:%.*]] = uitofp i32 %a to float
8887 // CHECK:   ret float [[TMP0]]
test_vcvts_f32_u32(uint32_t a)8888 float32_t test_vcvts_f32_u32(uint32_t a) {
8889   return vcvts_f32_u32(a);
8890 }
8891 
8892 // CHECK-LABEL: @test_vcvtd_f64_u64(
8893 // CHECK:   [[TMP0:%.*]] = uitofp i64 %a to double
8894 // CHECK:   ret double [[TMP0]]
test_vcvtd_f64_u64(uint64_t a)8895 float64_t test_vcvtd_f64_u64(uint64_t a) {
8896   return vcvtd_f64_u64(a);
8897 }
8898 
8899 // CHECK-LABEL: @test_vrecpes_f32(
8900 // CHECK:   [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
8901 // CHECK:   ret float [[VRECPES_F32_I]]
test_vrecpes_f32(float32_t a)8902 float32_t test_vrecpes_f32(float32_t a) {
8903   return vrecpes_f32(a);
8904 }
8905 
8906 // CHECK-LABEL: @test_vrecped_f64(
8907 // CHECK:   [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
8908 // CHECK:   ret double [[VRECPED_F64_I]]
test_vrecped_f64(float64_t a)8909 float64_t test_vrecped_f64(float64_t a) {
8910   return vrecped_f64(a);
8911 }
8912 
8913 // CHECK-LABEL: @test_vrecpxs_f32(
8914 // CHECK:   [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
8915 // CHECK:   ret float [[VRECPXS_F32_I]]
test_vrecpxs_f32(float32_t a)8916 float32_t test_vrecpxs_f32(float32_t a) {
8917   return vrecpxs_f32(a);
8918 }
8919 
8920 // CHECK-LABEL: @test_vrecpxd_f64(
8921 // CHECK:   [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
8922 // CHECK:   ret double [[VRECPXD_F64_I]]
test_vrecpxd_f64(float64_t a)8923 float64_t test_vrecpxd_f64(float64_t a) {
8924   return vrecpxd_f64(a);
8925 }
8926 
8927 // CHECK-LABEL: @test_vrsqrte_u32(
8928 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8929 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
8930 // CHECK:   ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)8931 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
8932   return vrsqrte_u32(a);
8933 }
8934 
8935 // CHECK-LABEL: @test_vrsqrteq_u32(
8936 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8937 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
8938 // CHECK:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)8939 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
8940   return vrsqrteq_u32(a);
8941 }
8942 
8943 // CHECK-LABEL: @test_vrsqrtes_f32(
8944 // CHECK:   [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
8945 // CHECK:   ret float [[VRSQRTES_F32_I]]
test_vrsqrtes_f32(float32_t a)8946 float32_t test_vrsqrtes_f32(float32_t a) {
8947   return vrsqrtes_f32(a);
8948 }
8949 
8950 // CHECK-LABEL: @test_vrsqrted_f64(
8951 // CHECK:   [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
8952 // CHECK:   ret double [[VRSQRTED_F64_I]]
test_vrsqrted_f64(float64_t a)8953 float64_t test_vrsqrted_f64(float64_t a) {
8954   return vrsqrted_f64(a);
8955 }
8956 
8957 // CHECK-LABEL: @test_vld1q_u8(
8958 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
8959 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1
8960 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_u8(uint8_t const * a)8961 uint8x16_t test_vld1q_u8(uint8_t const *a) {
8962   return vld1q_u8(a);
8963 }
8964 
8965 // CHECK-LABEL: @test_vld1q_u16(
8966 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
8967 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
8968 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
8969 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_u16(uint16_t const * a)8970 uint16x8_t test_vld1q_u16(uint16_t const *a) {
8971   return vld1q_u16(a);
8972 }
8973 
8974 // CHECK-LABEL: @test_vld1q_u32(
8975 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
8976 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
8977 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
8978 // CHECK:   ret <4 x i32> [[TMP2]]
test_vld1q_u32(uint32_t const * a)8979 uint32x4_t test_vld1q_u32(uint32_t const *a) {
8980   return vld1q_u32(a);
8981 }
8982 
8983 // CHECK-LABEL: @test_vld1q_u64(
8984 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
8985 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
8986 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
8987 // CHECK:   ret <2 x i64> [[TMP2]]
test_vld1q_u64(uint64_t const * a)8988 uint64x2_t test_vld1q_u64(uint64_t const *a) {
8989   return vld1q_u64(a);
8990 }
8991 
8992 // CHECK-LABEL: @test_vld1q_s8(
8993 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
8994 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1
8995 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_s8(int8_t const * a)8996 int8x16_t test_vld1q_s8(int8_t const *a) {
8997   return vld1q_s8(a);
8998 }
8999 
9000 // CHECK-LABEL: @test_vld1q_s16(
9001 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9002 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9003 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
9004 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_s16(int16_t const * a)9005 int16x8_t test_vld1q_s16(int16_t const *a) {
9006   return vld1q_s16(a);
9007 }
9008 
9009 // CHECK-LABEL: @test_vld1q_s32(
9010 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9011 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9012 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
9013 // CHECK:   ret <4 x i32> [[TMP2]]
test_vld1q_s32(int32_t const * a)9014 int32x4_t test_vld1q_s32(int32_t const *a) {
9015   return vld1q_s32(a);
9016 }
9017 
9018 // CHECK-LABEL: @test_vld1q_s64(
9019 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9020 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9021 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
9022 // CHECK:   ret <2 x i64> [[TMP2]]
test_vld1q_s64(int64_t const * a)9023 int64x2_t test_vld1q_s64(int64_t const *a) {
9024   return vld1q_s64(a);
9025 }
9026 
9027 // CHECK-LABEL: @test_vld1q_f16(
9028 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
9029 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
9030 // CHECK:   [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]], align 2
9031 // CHECK:   ret <8 x half> [[TMP2]]
test_vld1q_f16(float16_t const * a)9032 float16x8_t test_vld1q_f16(float16_t const *a) {
9033   return vld1q_f16(a);
9034 }
9035 
9036 // CHECK-LABEL: @test_vld1q_f32(
9037 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
9038 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
9039 // CHECK:   [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
9040 // CHECK:   ret <4 x float> [[TMP2]]
test_vld1q_f32(float32_t const * a)9041 float32x4_t test_vld1q_f32(float32_t const *a) {
9042   return vld1q_f32(a);
9043 }
9044 
9045 // CHECK-LABEL: @test_vld1q_f64(
9046 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
9047 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
9048 // CHECK:   [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
9049 // CHECK:   ret <2 x double> [[TMP2]]
test_vld1q_f64(float64_t const * a)9050 float64x2_t test_vld1q_f64(float64_t const *a) {
9051   return vld1q_f64(a);
9052 }
9053 
9054 // CHECK-LABEL: @test_vld1q_p8(
9055 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9056 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1
9057 // CHECK:   ret <16 x i8> [[TMP1]]
test_vld1q_p8(poly8_t const * a)9058 poly8x16_t test_vld1q_p8(poly8_t const *a) {
9059   return vld1q_p8(a);
9060 }
9061 
9062 // CHECK-LABEL: @test_vld1q_p16(
9063 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9064 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9065 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
9066 // CHECK:   ret <8 x i16> [[TMP2]]
test_vld1q_p16(poly16_t const * a)9067 poly16x8_t test_vld1q_p16(poly16_t const *a) {
9068   return vld1q_p16(a);
9069 }
9070 
9071 // CHECK-LABEL: @test_vld1_u8(
9072 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9073 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
9074 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_u8(uint8_t const * a)9075 uint8x8_t test_vld1_u8(uint8_t const *a) {
9076   return vld1_u8(a);
9077 }
9078 
9079 // CHECK-LABEL: @test_vld1_u16(
9080 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9081 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9082 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
9083 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_u16(uint16_t const * a)9084 uint16x4_t test_vld1_u16(uint16_t const *a) {
9085   return vld1_u16(a);
9086 }
9087 
9088 // CHECK-LABEL: @test_vld1_u32(
9089 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9090 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9091 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
9092 // CHECK:   ret <2 x i32> [[TMP2]]
test_vld1_u32(uint32_t const * a)9093 uint32x2_t test_vld1_u32(uint32_t const *a) {
9094   return vld1_u32(a);
9095 }
9096 
9097 // CHECK-LABEL: @test_vld1_u64(
9098 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9099 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9100 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]], align 8
9101 // CHECK:   ret <1 x i64> [[TMP2]]
test_vld1_u64(uint64_t const * a)9102 uint64x1_t test_vld1_u64(uint64_t const *a) {
9103   return vld1_u64(a);
9104 }
9105 
9106 // CHECK-LABEL: @test_vld1_s8(
9107 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9108 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
9109 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_s8(int8_t const * a)9110 int8x8_t test_vld1_s8(int8_t const *a) {
9111   return vld1_s8(a);
9112 }
9113 
9114 // CHECK-LABEL: @test_vld1_s16(
9115 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9116 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9117 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
9118 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_s16(int16_t const * a)9119 int16x4_t test_vld1_s16(int16_t const *a) {
9120   return vld1_s16(a);
9121 }
9122 
9123 // CHECK-LABEL: @test_vld1_s32(
9124 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9125 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9126 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
9127 // CHECK:   ret <2 x i32> [[TMP2]]
test_vld1_s32(int32_t const * a)9128 int32x2_t test_vld1_s32(int32_t const *a) {
9129   return vld1_s32(a);
9130 }
9131 
9132 // CHECK-LABEL: @test_vld1_s64(
9133 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9134 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9135 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]], align 8
9136 // CHECK:   ret <1 x i64> [[TMP2]]
test_vld1_s64(int64_t const * a)9137 int64x1_t test_vld1_s64(int64_t const *a) {
9138   return vld1_s64(a);
9139 }
9140 
9141 // CHECK-LABEL: @test_vld1_f16(
9142 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
9143 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
9144 // CHECK:   [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]], align 2
9145 // CHECK:   ret <4 x half> [[TMP2]]
test_vld1_f16(float16_t const * a)9146 float16x4_t test_vld1_f16(float16_t const *a) {
9147   return vld1_f16(a);
9148 }
9149 
9150 // CHECK-LABEL: @test_vld1_f32(
9151 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
9152 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
9153 // CHECK:   [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
9154 // CHECK:   ret <2 x float> [[TMP2]]
test_vld1_f32(float32_t const * a)9155 float32x2_t test_vld1_f32(float32_t const *a) {
9156   return vld1_f32(a);
9157 }
9158 
9159 // CHECK-LABEL: @test_vld1_f64(
9160 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
9161 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
9162 // CHECK:   [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]], align 8
9163 // CHECK:   ret <1 x double> [[TMP2]]
test_vld1_f64(float64_t const * a)9164 float64x1_t test_vld1_f64(float64_t const *a) {
9165   return vld1_f64(a);
9166 }
9167 
9168 // CHECK-LABEL: @test_vld1_p8(
9169 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9170 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
9171 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_p8(poly8_t const * a)9172 poly8x8_t test_vld1_p8(poly8_t const *a) {
9173   return vld1_p8(a);
9174 }
9175 
9176 // CHECK-LABEL: @test_vld1_p16(
9177 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9178 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9179 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
9180 // CHECK:   ret <4 x i16> [[TMP2]]
test_vld1_p16(poly16_t const * a)9181 poly16x4_t test_vld1_p16(poly16_t const *a) {
9182   return vld1_p16(a);
9183 }
9184 
9185 // CHECK-LABEL: @test_vld1_u8_void(
9186 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9187 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
9188 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_u8_void(void * a)9189 uint8x8_t test_vld1_u8_void(void *a) {
9190   return vld1_u8(a);
9191 }
9192 
9193 // CHECK-LABEL: @test_vld1_u16_void(
9194 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <4 x i16>*
9195 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1
9196 // CHECK:   ret <4 x i16> [[TMP1]]
test_vld1_u16_void(void * a)9197 uint16x4_t test_vld1_u16_void(void *a) {
9198   return vld1_u16(a);
9199 }
9200 
9201 // CHECK-LABEL: @test_vld1_u32_void(
9202 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <2 x i32>*
9203 // CHECK:   [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 1
9204 // CHECK:   ret <2 x i32> [[TMP1]]
test_vld1_u32_void(void * a)9205 uint32x2_t test_vld1_u32_void(void *a) {
9206   return vld1_u32(a);
9207 }
9208 
9209 // CHECK-LABEL: @test_vld1_u64_void(
9210 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <1 x i64>*
9211 // CHECK:   [[TMP1:%.*]] = load <1 x i64>, <1 x i64>* [[TMP0]], align 1
9212 // CHECK:   ret <1 x i64> [[TMP1]]
test_vld1_u64_void(void * a)9213 uint64x1_t test_vld1_u64_void(void *a) {
9214   return vld1_u64(a);
9215 }
9216 
9217 // CHECK-LABEL: @test_vld1_s8_void(
9218 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9219 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
9220 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_s8_void(void * a)9221 int8x8_t test_vld1_s8_void(void *a) {
9222   return vld1_s8(a);
9223 }
9224 
9225 // CHECK-LABEL: @test_vld1_s16_void(
9226 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <4 x i16>*
9227 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1
9228 // CHECK:   ret <4 x i16> [[TMP1]]
test_vld1_s16_void(void * a)9229 int16x4_t test_vld1_s16_void(void *a) {
9230   return vld1_s16(a);
9231 }
9232 
9233 // CHECK-LABEL: @test_vld1_s32_void(
9234 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <2 x i32>*
9235 // CHECK:   [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 1
9236 // CHECK:   ret <2 x i32> [[TMP1]]
test_vld1_s32_void(void * a)9237 int32x2_t test_vld1_s32_void(void *a) {
9238   return vld1_s32(a);
9239 }
9240 
9241 // CHECK-LABEL: @test_vld1_s64_void(
9242 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <1 x i64>*
9243 // CHECK:   [[TMP1:%.*]] = load <1 x i64>, <1 x i64>* [[TMP0]], align 1
9244 // CHECK:   ret <1 x i64> [[TMP1]]
test_vld1_s64_void(void * a)9245 int64x1_t test_vld1_s64_void(void *a) {
9246   return vld1_s64(a);
9247 }
9248 
9249 // CHECK-LABEL: @test_vld1_f16_void(
9250 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <4 x half>*
9251 // CHECK:   [[TMP1:%.*]] = load <4 x half>, <4 x half>* [[TMP0]], align 1
9252 // CHECK:   ret <4 x half> [[TMP1]]
test_vld1_f16_void(void * a)9253 float16x4_t test_vld1_f16_void(void *a) {
9254   return vld1_f16(a);
9255 }
9256 
9257 // CHECK-LABEL: @test_vld1_f32_void(
9258 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <2 x float>*
9259 // CHECK:   [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 1
9260 // CHECK:   ret <2 x float> [[TMP1]]
test_vld1_f32_void(void * a)9261 float32x2_t test_vld1_f32_void(void *a) {
9262   return vld1_f32(a);
9263 }
9264 
9265 // CHECK-LABEL: @test_vld1_f64_void(
9266 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <1 x double>*
9267 // CHECK:   [[TMP1:%.*]] = load <1 x double>, <1 x double>* [[TMP0]], align 1
9268 // CHECK:   ret <1 x double> [[TMP1]]
test_vld1_f64_void(void * a)9269 float64x1_t test_vld1_f64_void(void *a) {
9270   return vld1_f64(a);
9271 }
9272 
9273 // CHECK-LABEL: @test_vld1_p8_void(
9274 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9275 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
9276 // CHECK:   ret <8 x i8> [[TMP1]]
test_vld1_p8_void(void * a)9277 poly8x8_t test_vld1_p8_void(void *a) {
9278   return vld1_p8(a);
9279 }
9280 
9281 // CHECK-LABEL: @test_vld1_p16_void(
9282 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <4 x i16>*
9283 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1
9284 // CHECK:   ret <4 x i16> [[TMP1]]
test_vld1_p16_void(void * a)9285 poly16x4_t test_vld1_p16_void(void *a) {
9286   return vld1_p16(a);
9287 }
9288 
9289 // CHECK-LABEL: @test_vld2q_u8(
9290 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
9291 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
9292 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9293 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9294 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9295 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9296 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9297 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
9298 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9299 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9300 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
9301 // CHECK:   ret %struct.uint8x16x2_t [[TMP5]]
test_vld2q_u8(uint8_t const * a)9302 uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
9303   return vld2q_u8(a);
9304 }
9305 
9306 // CHECK-LABEL: @test_vld2q_u16(
9307 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
9308 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
9309 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9310 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9311 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9312 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9313 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9314 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9315 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
9316 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9317 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9318 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
9319 // CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
test_vld2q_u16(uint16_t const * a)9320 uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
9321   return vld2q_u16(a);
9322 }
9323 
9324 // CHECK-LABEL: @test_vld2q_u32(
9325 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
9326 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
9327 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9328 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9329 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9330 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9331 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9332 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9333 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
9334 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9335 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9336 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
9337 // CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
test_vld2q_u32(uint32_t const * a)9338 uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
9339   return vld2q_u32(a);
9340 }
9341 
9342 // CHECK-LABEL: @test_vld2q_u64(
9343 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
9344 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
9345 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9346 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9347 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9348 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9349 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9350 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9351 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
9352 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9353 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9354 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
9355 // CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
test_vld2q_u64(uint64_t const * a)9356 uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
9357   return vld2q_u64(a);
9358 }
9359 
9360 // CHECK-LABEL: @test_vld2q_s8(
9361 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
9362 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
9363 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9364 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9365 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9366 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9367 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9368 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
9369 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9370 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9371 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
9372 // CHECK:   ret %struct.int8x16x2_t [[TMP5]]
test_vld2q_s8(int8_t const * a)9373 int8x16x2_t test_vld2q_s8(int8_t const *a) {
9374   return vld2q_s8(a);
9375 }
9376 
9377 // CHECK-LABEL: @test_vld2q_s16(
9378 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
9379 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
9380 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9381 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9382 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9383 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9384 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9385 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9386 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
9387 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9388 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9389 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
9390 // CHECK:   ret %struct.int16x8x2_t [[TMP6]]
test_vld2q_s16(int16_t const * a)9391 int16x8x2_t test_vld2q_s16(int16_t const *a) {
9392   return vld2q_s16(a);
9393 }
9394 
9395 // CHECK-LABEL: @test_vld2q_s32(
9396 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
9397 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
9398 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9399 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9400 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9401 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9402 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9403 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9404 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
9405 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9406 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9407 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
9408 // CHECK:   ret %struct.int32x4x2_t [[TMP6]]
test_vld2q_s32(int32_t const * a)9409 int32x4x2_t test_vld2q_s32(int32_t const *a) {
9410   return vld2q_s32(a);
9411 }
9412 
9413 // CHECK-LABEL: @test_vld2q_s64(
9414 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
9415 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
9416 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9417 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9418 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9419 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9420 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9421 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9422 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
9423 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9424 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9425 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
9426 // CHECK:   ret %struct.int64x2x2_t [[TMP6]]
test_vld2q_s64(int64_t const * a)9427 int64x2x2_t test_vld2q_s64(int64_t const *a) {
9428   return vld2q_s64(a);
9429 }
9430 
9431 // CHECK-LABEL: @test_vld2q_f16(
9432 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
9433 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
9434 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9435 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
9436 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9437 // CHECK:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9438 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
9439 // CHECK:   store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]]
9440 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
9441 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9442 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9443 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
9444 // CHECK:   ret %struct.float16x8x2_t [[TMP6]]
test_vld2q_f16(float16_t const * a)9445 float16x8x2_t test_vld2q_f16(float16_t const *a) {
9446   return vld2q_f16(a);
9447 }
9448 
9449 // CHECK-LABEL: @test_vld2q_f32(
9450 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
9451 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
9452 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9453 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
9454 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9455 // CHECK:   [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9456 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
9457 // CHECK:   store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
9458 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
9459 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9460 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9461 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
9462 // CHECK:   ret %struct.float32x4x2_t [[TMP6]]
test_vld2q_f32(float32_t const * a)9463 float32x4x2_t test_vld2q_f32(float32_t const *a) {
9464   return vld2q_f32(a);
9465 }
9466 
9467 // CHECK-LABEL: @test_vld2q_f64(
9468 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
9469 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
9470 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9471 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
9472 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9473 // CHECK:   [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9474 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
9475 // CHECK:   store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
9476 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
9477 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9478 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9479 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
9480 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
test_vld2q_f64(float64_t const * a)9481 float64x2x2_t test_vld2q_f64(float64_t const *a) {
9482   return vld2q_f64(a);
9483 }
9484 
9485 // CHECK-LABEL: @test_vld2q_p8(
9486 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
9487 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
9488 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9489 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9490 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9491 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9492 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9493 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
9494 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9495 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9496 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
9497 // CHECK:   ret %struct.poly8x16x2_t [[TMP5]]
test_vld2q_p8(poly8_t const * a)9498 poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
9499   return vld2q_p8(a);
9500 }
9501 
9502 // CHECK-LABEL: @test_vld2q_p16(
9503 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
9504 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
9505 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9506 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9507 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9508 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9509 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9510 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9511 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
9512 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9513 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9514 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
9515 // CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
test_vld2q_p16(poly16_t const * a)9516 poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
9517   return vld2q_p16(a);
9518 }
9519 
9520 // CHECK-LABEL: @test_vld2_u8(
9521 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
9522 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
9523 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9524 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9525 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9526 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9527 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9528 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
9529 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9530 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9531 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
9532 // CHECK:   ret %struct.uint8x8x2_t [[TMP5]]
test_vld2_u8(uint8_t const * a)9533 uint8x8x2_t test_vld2_u8(uint8_t const *a) {
9534   return vld2_u8(a);
9535 }
9536 
9537 // CHECK-LABEL: @test_vld2_u16(
9538 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
9539 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
9540 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9541 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9542 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9543 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9544 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9545 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9546 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
9547 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9548 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9549 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
9550 // CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
test_vld2_u16(uint16_t const * a)9551 uint16x4x2_t test_vld2_u16(uint16_t const *a) {
9552   return vld2_u16(a);
9553 }
9554 
9555 // CHECK-LABEL: @test_vld2_u32(
9556 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
9557 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
9558 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9559 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9560 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9561 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9562 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9563 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9564 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
9565 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9566 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9567 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
9568 // CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
test_vld2_u32(uint32_t const * a)9569 uint32x2x2_t test_vld2_u32(uint32_t const *a) {
9570   return vld2_u32(a);
9571 }
9572 
9573 // CHECK-LABEL: @test_vld2_u64(
9574 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
9575 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
9576 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9577 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9578 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9579 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9580 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9581 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9582 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
9583 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9584 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9585 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
9586 // CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
test_vld2_u64(uint64_t const * a)9587 uint64x1x2_t test_vld2_u64(uint64_t const *a) {
9588   return vld2_u64(a);
9589 }
9590 
9591 // CHECK-LABEL: @test_vld2_s8(
9592 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
9593 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
9594 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9595 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9596 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9597 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9598 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9599 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
9600 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9601 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9602 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
9603 // CHECK:   ret %struct.int8x8x2_t [[TMP5]]
test_vld2_s8(int8_t const * a)9604 int8x8x2_t test_vld2_s8(int8_t const *a) {
9605   return vld2_s8(a);
9606 }
9607 
9608 // CHECK-LABEL: @test_vld2_s16(
9609 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
9610 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
9611 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9612 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9613 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9614 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9615 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9616 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9617 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
9618 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9619 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9620 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
9621 // CHECK:   ret %struct.int16x4x2_t [[TMP6]]
test_vld2_s16(int16_t const * a)9622 int16x4x2_t test_vld2_s16(int16_t const *a) {
9623   return vld2_s16(a);
9624 }
9625 
9626 // CHECK-LABEL: @test_vld2_s32(
9627 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
9628 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
9629 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9630 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9631 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9632 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9633 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9634 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9635 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
9636 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9637 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9638 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
9639 // CHECK:   ret %struct.int32x2x2_t [[TMP6]]
test_vld2_s32(int32_t const * a)9640 int32x2x2_t test_vld2_s32(int32_t const *a) {
9641   return vld2_s32(a);
9642 }
9643 
9644 // CHECK-LABEL: @test_vld2_s64(
9645 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
9646 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
9647 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9648 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9649 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9650 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9651 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9652 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9653 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
9654 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9655 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9656 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
9657 // CHECK:   ret %struct.int64x1x2_t [[TMP6]]
test_vld2_s64(int64_t const * a)9658 int64x1x2_t test_vld2_s64(int64_t const *a) {
9659   return vld2_s64(a);
9660 }
9661 
9662 // CHECK-LABEL: @test_vld2_f16(
9663 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
9664 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
9665 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9666 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
9667 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
9668 // CHECK:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>* [[TMP2]])
9669 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
9670 // CHECK:   store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]]
9671 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
9672 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9673 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9674 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
9675 // CHECK:   ret %struct.float16x4x2_t [[TMP6]]
test_vld2_f16(float16_t const * a)9676 float16x4x2_t test_vld2_f16(float16_t const *a) {
9677   return vld2_f16(a);
9678 }
9679 
9680 // CHECK-LABEL: @test_vld2_f32(
9681 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
9682 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
9683 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9684 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
9685 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
9686 // CHECK:   [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
9687 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
9688 // CHECK:   store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
9689 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
9690 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9691 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9692 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
9693 // CHECK:   ret %struct.float32x2x2_t [[TMP6]]
test_vld2_f32(float32_t const * a)9694 float32x2x2_t test_vld2_f32(float32_t const *a) {
9695   return vld2_f32(a);
9696 }
9697 
9698 // CHECK-LABEL: @test_vld2_f64(
9699 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
9700 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
9701 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9702 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
9703 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
9704 // CHECK:   [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]])
9705 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
9706 // CHECK:   store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
9707 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
9708 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9709 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9710 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
9711 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
test_vld2_f64(float64_t const * a)9712 float64x1x2_t test_vld2_f64(float64_t const *a) {
9713   return vld2_f64(a);
9714 }
9715 
9716 // CHECK-LABEL: @test_vld2_p8(
9717 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
9718 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
9719 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9720 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9721 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9722 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9723 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9724 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
9725 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9726 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9727 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
9728 // CHECK:   ret %struct.poly8x8x2_t [[TMP5]]
test_vld2_p8(poly8_t const * a)9729 poly8x8x2_t test_vld2_p8(poly8_t const *a) {
9730   return vld2_p8(a);
9731 }
9732 
9733 // CHECK-LABEL: @test_vld2_p16(
9734 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
9735 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
9736 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9737 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9738 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9739 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9740 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9741 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9742 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
9743 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9744 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9745 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
9746 // CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
test_vld2_p16(poly16_t const * a)9747 poly16x4x2_t test_vld2_p16(poly16_t const *a) {
9748   return vld2_p16(a);
9749 }
9750 
9751 // CHECK-LABEL: @test_vld3q_u8(
9752 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
9753 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
9754 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9755 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9756 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9757 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9758 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9759 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
9760 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9761 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9762 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
9763 // CHECK:   ret %struct.uint8x16x3_t [[TMP5]]
test_vld3q_u8(uint8_t const * a)9764 uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
9765   return vld3q_u8(a);
9766 }
9767 
9768 // CHECK-LABEL: @test_vld3q_u16(
9769 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
9770 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
9771 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9772 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9773 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9774 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9775 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9776 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9777 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
9778 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9779 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9780 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
9781 // CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
test_vld3q_u16(uint16_t const * a)9782 uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
9783   return vld3q_u16(a);
9784 }
9785 
9786 // CHECK-LABEL: @test_vld3q_u32(
9787 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
9788 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
9789 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9790 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9791 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9792 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9793 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9794 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9795 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
9796 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9797 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9798 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
9799 // CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
test_vld3q_u32(uint32_t const * a)9800 uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
9801   return vld3q_u32(a);
9802 }
9803 
9804 // CHECK-LABEL: @test_vld3q_u64(
9805 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
9806 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
9807 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9808 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9809 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9810 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9811 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9812 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9813 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
9814 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9815 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9816 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
9817 // CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
test_vld3q_u64(uint64_t const * a)9818 uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
9819   return vld3q_u64(a);
9820 }
9821 
9822 // CHECK-LABEL: @test_vld3q_s8(
9823 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
9824 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
9825 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9826 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9827 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9828 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9829 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9830 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
9831 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9832 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9833 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
9834 // CHECK:   ret %struct.int8x16x3_t [[TMP5]]
test_vld3q_s8(int8_t const * a)9835 int8x16x3_t test_vld3q_s8(int8_t const *a) {
9836   return vld3q_s8(a);
9837 }
9838 
9839 // CHECK-LABEL: @test_vld3q_s16(
9840 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
9841 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
9842 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9843 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9844 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9845 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9846 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9847 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9848 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
9849 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9850 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9851 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
9852 // CHECK:   ret %struct.int16x8x3_t [[TMP6]]
test_vld3q_s16(int16_t const * a)9853 int16x8x3_t test_vld3q_s16(int16_t const *a) {
9854   return vld3q_s16(a);
9855 }
9856 
9857 // CHECK-LABEL: @test_vld3q_s32(
9858 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
9859 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
9860 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9861 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
9862 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9863 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9864 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9865 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9866 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
9867 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9868 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9869 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
9870 // CHECK:   ret %struct.int32x4x3_t [[TMP6]]
test_vld3q_s32(int32_t const * a)9871 int32x4x3_t test_vld3q_s32(int32_t const *a) {
9872   return vld3q_s32(a);
9873 }
9874 
9875 // CHECK-LABEL: @test_vld3q_s64(
9876 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
9877 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
9878 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9879 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
9880 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9881 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9882 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9883 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9884 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
9885 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9886 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9887 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
9888 // CHECK:   ret %struct.int64x2x3_t [[TMP6]]
test_vld3q_s64(int64_t const * a)9889 int64x2x3_t test_vld3q_s64(int64_t const *a) {
9890   return vld3q_s64(a);
9891 }
9892 
9893 // CHECK-LABEL: @test_vld3q_f16(
9894 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
9895 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
9896 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9897 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
9898 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9899 // CHECK:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9900 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
9901 // CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
9902 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
9903 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9904 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9905 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
9906 // CHECK:   ret %struct.float16x8x3_t [[TMP6]]
test_vld3q_f16(float16_t const * a)9907 float16x8x3_t test_vld3q_f16(float16_t const *a) {
9908   return vld3q_f16(a);
9909 }
9910 
9911 // CHECK-LABEL: @test_vld3q_f32(
9912 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
9913 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
9914 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9915 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
9916 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9917 // CHECK:   [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9918 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
9919 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
9920 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
9921 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9922 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9923 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
9924 // CHECK:   ret %struct.float32x4x3_t [[TMP6]]
test_vld3q_f32(float32_t const * a)9925 float32x4x3_t test_vld3q_f32(float32_t const *a) {
9926   return vld3q_f32(a);
9927 }
9928 
9929 // CHECK-LABEL: @test_vld3q_f64(
9930 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
9931 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
9932 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9933 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
9934 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9935 // CHECK:   [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9936 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
9937 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
9938 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
9939 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9940 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9941 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
9942 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
test_vld3q_f64(float64_t const * a)9943 float64x2x3_t test_vld3q_f64(float64_t const *a) {
9944   return vld3q_f64(a);
9945 }
9946 
9947 // CHECK-LABEL: @test_vld3q_p8(
9948 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
9949 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
9950 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9951 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9952 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9953 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9954 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9955 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
9956 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9957 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9958 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
9959 // CHECK:   ret %struct.poly8x16x3_t [[TMP5]]
test_vld3q_p8(poly8_t const * a)9960 poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
9961   return vld3q_p8(a);
9962 }
9963 
9964 // CHECK-LABEL: @test_vld3q_p16(
9965 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
9966 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
9967 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9968 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
9969 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9970 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9971 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9972 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9973 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
9974 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9975 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9976 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
9977 // CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
test_vld3q_p16(poly16_t const * a)9978 poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
9979   return vld3q_p16(a);
9980 }
9981 
9982 // CHECK-LABEL: @test_vld3_u8(
9983 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
9984 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
9985 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9986 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9987 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9988 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
9989 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
9990 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
9991 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9992 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
9993 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
9994 // CHECK:   ret %struct.uint8x8x3_t [[TMP5]]
test_vld3_u8(uint8_t const * a)9995 uint8x8x3_t test_vld3_u8(uint8_t const *a) {
9996   return vld3_u8(a);
9997 }
9998 
9999 // CHECK-LABEL: @test_vld3_u16(
10000 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
10001 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
10002 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
10003 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10004 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10005 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10006 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10007 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10008 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
10009 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
10010 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10011 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
10012 // CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
test_vld3_u16(uint16_t const * a)10013 uint16x4x3_t test_vld3_u16(uint16_t const *a) {
10014   return vld3_u16(a);
10015 }
10016 
10017 // CHECK-LABEL: @test_vld3_u32(
10018 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
10019 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
10020 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
10021 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10022 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10023 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10024 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10025 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10026 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
10027 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
10028 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10029 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
10030 // CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
test_vld3_u32(uint32_t const * a)10031 uint32x2x3_t test_vld3_u32(uint32_t const *a) {
10032   return vld3_u32(a);
10033 }
10034 
10035 // CHECK-LABEL: @test_vld3_u64(
10036 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
10037 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
10038 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
10039 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10040 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10041 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10042 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10043 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10044 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
10045 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
10046 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10047 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
10048 // CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
test_vld3_u64(uint64_t const * a)10049 uint64x1x3_t test_vld3_u64(uint64_t const *a) {
10050   return vld3_u64(a);
10051 }
10052 
10053 // CHECK-LABEL: @test_vld3_s8(
10054 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
10055 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
10056 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
10057 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10058 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10059 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10060 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10061 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
10062 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
10063 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
10064 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
10065 // CHECK:   ret %struct.int8x8x3_t [[TMP5]]
test_vld3_s8(int8_t const * a)10066 int8x8x3_t test_vld3_s8(int8_t const *a) {
10067   return vld3_s8(a);
10068 }
10069 
10070 // CHECK-LABEL: @test_vld3_s16(
10071 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
10072 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
10073 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
10074 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10075 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10076 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10077 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10078 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10079 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
10080 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
10081 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10082 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
10083 // CHECK:   ret %struct.int16x4x3_t [[TMP6]]
test_vld3_s16(int16_t const * a)10084 int16x4x3_t test_vld3_s16(int16_t const *a) {
10085   return vld3_s16(a);
10086 }
10087 
10088 // CHECK-LABEL: @test_vld3_s32(
10089 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
10090 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
10091 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10092 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10093 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10094 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10095 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10096 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10097 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
10098 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10099 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10100 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
10101 // CHECK:   ret %struct.int32x2x3_t [[TMP6]]
test_vld3_s32(int32_t const * a)10102 int32x2x3_t test_vld3_s32(int32_t const *a) {
10103   return vld3_s32(a);
10104 }
10105 
10106 // CHECK-LABEL: @test_vld3_s64(
10107 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
10108 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
10109 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10110 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10111 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10112 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10113 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10114 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10115 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
10116 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10117 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10118 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
10119 // CHECK:   ret %struct.int64x1x3_t [[TMP6]]
test_vld3_s64(int64_t const * a)10120 int64x1x3_t test_vld3_s64(int64_t const *a) {
10121   return vld3_s64(a);
10122 }
10123 
10124 // CHECK-LABEL: @test_vld3_f16(
10125 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
10126 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
10127 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10128 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10129 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10130 // CHECK:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10131 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
10132 // CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10133 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
10134 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10135 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10136 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
10137 // CHECK:   ret %struct.float16x4x3_t [[TMP6]]
test_vld3_f16(float16_t const * a)10138 float16x4x3_t test_vld3_f16(float16_t const *a) {
10139   return vld3_f16(a);
10140 }
10141 
10142 // CHECK-LABEL: @test_vld3_f32(
10143 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
10144 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
10145 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10146 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10147 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10148 // CHECK:   [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10149 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
10150 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10151 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
10152 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10153 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10154 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
10155 // CHECK:   ret %struct.float32x2x3_t [[TMP6]]
test_vld3_f32(float32_t const * a)10156 float32x2x3_t test_vld3_f32(float32_t const *a) {
10157   return vld3_f32(a);
10158 }
10159 
10160 // CHECK-LABEL: @test_vld3_f64(
10161 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
10162 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
10163 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10164 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10165 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10166 // CHECK:   [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10167 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
10168 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10169 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
10170 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10171 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10172 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
10173 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
test_vld3_f64(float64_t const * a)10174 float64x1x3_t test_vld3_f64(float64_t const *a) {
10175   return vld3_f64(a);
10176 }
10177 
10178 // CHECK-LABEL: @test_vld3_p8(
10179 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
10180 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
10181 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10182 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10183 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10184 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10185 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10186 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
10187 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10188 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
10189 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
10190 // CHECK:   ret %struct.poly8x8x3_t [[TMP5]]
test_vld3_p8(poly8_t const * a)10191 poly8x8x3_t test_vld3_p8(poly8_t const *a) {
10192   return vld3_p8(a);
10193 }
10194 
10195 // CHECK-LABEL: @test_vld3_p16(
10196 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
10197 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
10198 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10199 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10200 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10201 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10202 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10203 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10204 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
10205 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10206 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10207 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
10208 // CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
test_vld3_p16(poly16_t const * a)10209 poly16x4x3_t test_vld3_p16(poly16_t const *a) {
10210   return vld3_p16(a);
10211 }
10212 
10213 // CHECK-LABEL: @test_vld4q_u8(
10214 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
10215 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
10216 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10217 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10218 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10219 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10220 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10221 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
10222 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10223 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10224 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
10225 // CHECK:   ret %struct.uint8x16x4_t [[TMP5]]
test_vld4q_u8(uint8_t const * a)10226 uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
10227   return vld4q_u8(a);
10228 }
10229 
10230 // CHECK-LABEL: @test_vld4q_u16(
10231 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
10232 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
10233 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10234 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10235 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10236 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10237 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10238 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10239 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
10240 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10241 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10242 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
10243 // CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
test_vld4q_u16(uint16_t const * a)10244 uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
10245   return vld4q_u16(a);
10246 }
10247 
10248 // CHECK-LABEL: @test_vld4q_u32(
10249 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
10250 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
10251 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10252 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10253 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10254 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10255 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10256 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10257 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
10258 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10259 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10260 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
10261 // CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
test_vld4q_u32(uint32_t const * a)10262 uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
10263   return vld4q_u32(a);
10264 }
10265 
10266 // CHECK-LABEL: @test_vld4q_u64(
10267 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
10268 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
10269 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10270 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10271 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10272 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10273 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10274 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10275 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
10276 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10277 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10278 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
10279 // CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
test_vld4q_u64(uint64_t const * a)10280 uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
10281   return vld4q_u64(a);
10282 }
10283 
10284 // CHECK-LABEL: @test_vld4q_s8(
10285 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
10286 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
10287 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10288 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10289 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10290 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10291 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10292 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
10293 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10294 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10295 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
10296 // CHECK:   ret %struct.int8x16x4_t [[TMP5]]
test_vld4q_s8(int8_t const * a)10297 int8x16x4_t test_vld4q_s8(int8_t const *a) {
10298   return vld4q_s8(a);
10299 }
10300 
10301 // CHECK-LABEL: @test_vld4q_s16(
10302 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
10303 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
10304 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10305 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10306 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10307 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10308 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10309 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10310 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
10311 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10312 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10313 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
10314 // CHECK:   ret %struct.int16x8x4_t [[TMP6]]
test_vld4q_s16(int16_t const * a)10315 int16x8x4_t test_vld4q_s16(int16_t const *a) {
10316   return vld4q_s16(a);
10317 }
10318 
10319 // CHECK-LABEL: @test_vld4q_s32(
10320 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
10321 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
10322 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10323 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10324 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10325 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10326 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10327 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10328 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
10329 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10330 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10331 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
10332 // CHECK:   ret %struct.int32x4x4_t [[TMP6]]
test_vld4q_s32(int32_t const * a)10333 int32x4x4_t test_vld4q_s32(int32_t const *a) {
10334   return vld4q_s32(a);
10335 }
10336 
10337 // CHECK-LABEL: @test_vld4q_s64(
10338 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
10339 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
10340 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10341 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10342 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10343 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10344 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10345 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10346 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
10347 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10348 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10349 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
10350 // CHECK:   ret %struct.int64x2x4_t [[TMP6]]
test_vld4q_s64(int64_t const * a)10351 int64x2x4_t test_vld4q_s64(int64_t const *a) {
10352   return vld4q_s64(a);
10353 }
10354 
10355 // CHECK-LABEL: @test_vld4q_f16(
10356 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
10357 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
10358 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10359 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10360 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
10361 // CHECK:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>* [[TMP2]])
10362 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
10363 // CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
10364 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
10365 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10366 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10367 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
10368 // CHECK:   ret %struct.float16x8x4_t [[TMP6]]
test_vld4q_f16(float16_t const * a)10369 float16x8x4_t test_vld4q_f16(float16_t const *a) {
10370   return vld4q_f16(a);
10371 }
10372 
10373 // CHECK-LABEL: @test_vld4q_f32(
10374 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
10375 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
10376 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10377 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10378 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10379 // CHECK:   [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10380 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
10381 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
10382 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
10383 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10384 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10385 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
10386 // CHECK:   ret %struct.float32x4x4_t [[TMP6]]
test_vld4q_f32(float32_t const * a)10387 float32x4x4_t test_vld4q_f32(float32_t const *a) {
10388   return vld4q_f32(a);
10389 }
10390 
10391 // CHECK-LABEL: @test_vld4q_f64(
10392 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
10393 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
10394 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10395 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10396 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10397 // CHECK:   [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10398 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
10399 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
10400 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
10401 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10402 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10403 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
10404 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
test_vld4q_f64(float64_t const * a)10405 float64x2x4_t test_vld4q_f64(float64_t const *a) {
10406   return vld4q_f64(a);
10407 }
10408 
10409 // CHECK-LABEL: @test_vld4q_p8(
10410 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
10411 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
10412 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10413 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10414 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10415 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10416 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10417 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
10418 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10419 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10420 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
10421 // CHECK:   ret %struct.poly8x16x4_t [[TMP5]]
test_vld4q_p8(poly8_t const * a)10422 poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
10423   return vld4q_p8(a);
10424 }
10425 
10426 // CHECK-LABEL: @test_vld4q_p16(
10427 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
10428 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
10429 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10430 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10431 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10432 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10433 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10434 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10435 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
10436 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10437 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10438 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
10439 // CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
test_vld4q_p16(poly16_t const * a)10440 poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
10441   return vld4q_p16(a);
10442 }
10443 
10444 // CHECK-LABEL: @test_vld4_u8(
10445 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
10446 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
10447 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10448 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10449 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10450 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10451 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10452 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
10453 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10454 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10455 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
10456 // CHECK:   ret %struct.uint8x8x4_t [[TMP5]]
test_vld4_u8(uint8_t const * a)10457 uint8x8x4_t test_vld4_u8(uint8_t const *a) {
10458   return vld4_u8(a);
10459 }
10460 
10461 // CHECK-LABEL: @test_vld4_u16(
10462 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
10463 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
10464 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10465 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10466 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10467 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10468 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10469 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10470 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
10471 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10472 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10473 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
10474 // CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
test_vld4_u16(uint16_t const * a)10475 uint16x4x4_t test_vld4_u16(uint16_t const *a) {
10476   return vld4_u16(a);
10477 }
10478 
10479 // CHECK-LABEL: @test_vld4_u32(
10480 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
10481 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
10482 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10483 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10484 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10485 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10486 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10487 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10488 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
10489 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10490 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10491 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
10492 // CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
test_vld4_u32(uint32_t const * a)10493 uint32x2x4_t test_vld4_u32(uint32_t const *a) {
10494   return vld4_u32(a);
10495 }
10496 
10497 // CHECK-LABEL: @test_vld4_u64(
10498 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
10499 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
10500 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10501 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10502 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10503 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10504 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10505 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10506 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
10507 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10508 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10509 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
10510 // CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
test_vld4_u64(uint64_t const * a)10511 uint64x1x4_t test_vld4_u64(uint64_t const *a) {
10512   return vld4_u64(a);
10513 }
10514 
10515 // CHECK-LABEL: @test_vld4_s8(
10516 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
10517 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
10518 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10519 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10520 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10521 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10522 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10523 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
10524 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10525 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10526 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
10527 // CHECK:   ret %struct.int8x8x4_t [[TMP5]]
test_vld4_s8(int8_t const * a)10528 int8x8x4_t test_vld4_s8(int8_t const *a) {
10529   return vld4_s8(a);
10530 }
10531 
10532 // CHECK-LABEL: @test_vld4_s16(
10533 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
10534 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
10535 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10536 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10537 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10538 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10539 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10540 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10541 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
10542 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10543 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10544 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
10545 // CHECK:   ret %struct.int16x4x4_t [[TMP6]]
test_vld4_s16(int16_t const * a)10546 int16x4x4_t test_vld4_s16(int16_t const *a) {
10547   return vld4_s16(a);
10548 }
10549 
10550 // CHECK-LABEL: @test_vld4_s32(
10551 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
10552 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
10553 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10554 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10555 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10556 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10557 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10558 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10559 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
10560 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10561 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10562 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
10563 // CHECK:   ret %struct.int32x2x4_t [[TMP6]]
test_vld4_s32(int32_t const * a)10564 int32x2x4_t test_vld4_s32(int32_t const *a) {
10565   return vld4_s32(a);
10566 }
10567 
10568 // CHECK-LABEL: @test_vld4_s64(
10569 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
10570 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
10571 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10572 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10573 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10574 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10575 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10576 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10577 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
10578 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10579 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10580 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
10581 // CHECK:   ret %struct.int64x1x4_t [[TMP6]]
test_vld4_s64(int64_t const * a)10582 int64x1x4_t test_vld4_s64(int64_t const *a) {
10583   return vld4_s64(a);
10584 }
10585 
10586 // CHECK-LABEL: @test_vld4_f16(
10587 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
10588 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
10589 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10590 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10591 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10592 // CHECK:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10593 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
10594 // CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10595 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
10596 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10597 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10598 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
10599 // CHECK:   ret %struct.float16x4x4_t [[TMP6]]
test_vld4_f16(float16_t const * a)10600 float16x4x4_t test_vld4_f16(float16_t const *a) {
10601   return vld4_f16(a);
10602 }
10603 
10604 // CHECK-LABEL: @test_vld4_f32(
10605 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
10606 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
10607 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10608 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10609 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10610 // CHECK:   [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10611 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
10612 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10613 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
10614 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10615 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10616 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
10617 // CHECK:   ret %struct.float32x2x4_t [[TMP6]]
test_vld4_f32(float32_t const * a)10618 float32x2x4_t test_vld4_f32(float32_t const *a) {
10619   return vld4_f32(a);
10620 }
10621 
10622 // CHECK-LABEL: @test_vld4_f64(
10623 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
10624 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
10625 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10626 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10627 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10628 // CHECK:   [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10629 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
10630 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10631 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
10632 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10633 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10634 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
10635 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
test_vld4_f64(float64_t const * a)10636 float64x1x4_t test_vld4_f64(float64_t const *a) {
10637   return vld4_f64(a);
10638 }
10639 
10640 // CHECK-LABEL: @test_vld4_p8(
10641 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
10642 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
10643 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10644 // CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10645 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10646 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10647 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10648 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
10649 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10650 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10651 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
10652 // CHECK:   ret %struct.poly8x8x4_t [[TMP5]]
test_vld4_p8(poly8_t const * a)10653 poly8x8x4_t test_vld4_p8(poly8_t const *a) {
10654   return vld4_p8(a);
10655 }
10656 
10657 // CHECK-LABEL: @test_vld4_p16(
10658 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
10659 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
10660 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10661 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10662 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10663 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10664 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10665 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10666 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
10667 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10668 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10669 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
10670 // CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
test_vld4_p16(poly16_t const * a)10671 poly16x4x4_t test_vld4_p16(poly16_t const *a) {
10672   return vld4_p16(a);
10673 }
10674 
10675 // CHECK-LABEL: @test_vst1q_u8(
10676 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10677 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
10678 // CHECK:   ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)10679 void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
10680   vst1q_u8(a, b);
10681 }
10682 
10683 // CHECK-LABEL: @test_vst1q_u16(
10684 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10685 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10686 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10687 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10688 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10689 // CHECK:   ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)10690 void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
10691   vst1q_u16(a, b);
10692 }
10693 
10694 // CHECK-LABEL: @test_vst1q_u32(
10695 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10696 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10697 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10698 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10699 // CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10700 // CHECK:   ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)10701 void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
10702   vst1q_u32(a, b);
10703 }
10704 
10705 // CHECK-LABEL: @test_vst1q_u64(
10706 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10707 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10708 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10709 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10710 // CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10711 // CHECK:   ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)10712 void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
10713   vst1q_u64(a, b);
10714 }
10715 
10716 // CHECK-LABEL: @test_vst1q_s8(
10717 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10718 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
10719 // CHECK:   ret void
test_vst1q_s8(int8_t * a,int8x16_t b)10720 void test_vst1q_s8(int8_t *a, int8x16_t b) {
10721   vst1q_s8(a, b);
10722 }
10723 
10724 // CHECK-LABEL: @test_vst1q_s16(
10725 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10726 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10727 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10728 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10729 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10730 // CHECK:   ret void
test_vst1q_s16(int16_t * a,int16x8_t b)10731 void test_vst1q_s16(int16_t *a, int16x8_t b) {
10732   vst1q_s16(a, b);
10733 }
10734 
10735 // CHECK-LABEL: @test_vst1q_s32(
10736 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10737 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10738 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10739 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10740 // CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10741 // CHECK:   ret void
test_vst1q_s32(int32_t * a,int32x4_t b)10742 void test_vst1q_s32(int32_t *a, int32x4_t b) {
10743   vst1q_s32(a, b);
10744 }
10745 
10746 // CHECK-LABEL: @test_vst1q_s64(
10747 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10748 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10749 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10750 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10751 // CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10752 // CHECK:   ret void
test_vst1q_s64(int64_t * a,int64x2_t b)10753 void test_vst1q_s64(int64_t *a, int64x2_t b) {
10754   vst1q_s64(a, b);
10755 }
10756 
10757 // CHECK-LABEL: @test_vst1q_f16(
10758 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
10759 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
10760 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
10761 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
10762 // CHECK:   store <8 x half> [[TMP3]], <8 x half>* [[TMP2]]
10763 // CHECK:   ret void
test_vst1q_f16(float16_t * a,float16x8_t b)10764 void test_vst1q_f16(float16_t *a, float16x8_t b) {
10765   vst1q_f16(a, b);
10766 }
10767 
10768 // CHECK-LABEL: @test_vst1q_f32(
10769 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
10770 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
10771 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
10772 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
10773 // CHECK:   store <4 x float> [[TMP3]], <4 x float>* [[TMP2]]
10774 // CHECK:   ret void
test_vst1q_f32(float32_t * a,float32x4_t b)10775 void test_vst1q_f32(float32_t *a, float32x4_t b) {
10776   vst1q_f32(a, b);
10777 }
10778 
10779 // CHECK-LABEL: @test_vst1q_f64(
10780 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
10781 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
10782 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
10783 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
10784 // CHECK:   store <2 x double> [[TMP3]], <2 x double>* [[TMP2]]
10785 // CHECK:   ret void
test_vst1q_f64(float64_t * a,float64x2_t b)10786 void test_vst1q_f64(float64_t *a, float64x2_t b) {
10787   vst1q_f64(a, b);
10788 }
10789 
10790 // CHECK-LABEL: @test_vst1q_p8(
10791 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10792 // CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
10793 // CHECK:   ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)10794 void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
10795   vst1q_p8(a, b);
10796 }
10797 
10798 // CHECK-LABEL: @test_vst1q_p16(
10799 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10800 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10801 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10802 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10803 // CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10804 // CHECK:   ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)10805 void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
10806   vst1q_p16(a, b);
10807 }
10808 
10809 // CHECK-LABEL: @test_vst1_u8(
10810 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10811 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
10812 // CHECK:   ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)10813 void test_vst1_u8(uint8_t *a, uint8x8_t b) {
10814   vst1_u8(a, b);
10815 }
10816 
10817 // CHECK-LABEL: @test_vst1_u16(
10818 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10819 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10820 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10821 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10822 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10823 // CHECK:   ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)10824 void test_vst1_u16(uint16_t *a, uint16x4_t b) {
10825   vst1_u16(a, b);
10826 }
10827 
10828 // CHECK-LABEL: @test_vst1_u32(
10829 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10830 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10831 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10832 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10833 // CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10834 // CHECK:   ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)10835 void test_vst1_u32(uint32_t *a, uint32x2_t b) {
10836   vst1_u32(a, b);
10837 }
10838 
10839 // CHECK-LABEL: @test_vst1_u64(
10840 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10841 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10842 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10843 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10844 // CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10845 // CHECK:   ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)10846 void test_vst1_u64(uint64_t *a, uint64x1_t b) {
10847   vst1_u64(a, b);
10848 }
10849 
10850 // CHECK-LABEL: @test_vst1_s8(
10851 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10852 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
10853 // CHECK:   ret void
test_vst1_s8(int8_t * a,int8x8_t b)10854 void test_vst1_s8(int8_t *a, int8x8_t b) {
10855   vst1_s8(a, b);
10856 }
10857 
10858 // CHECK-LABEL: @test_vst1_s16(
10859 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10860 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10861 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10862 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10863 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10864 // CHECK:   ret void
test_vst1_s16(int16_t * a,int16x4_t b)10865 void test_vst1_s16(int16_t *a, int16x4_t b) {
10866   vst1_s16(a, b);
10867 }
10868 
10869 // CHECK-LABEL: @test_vst1_s32(
10870 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10871 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10872 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10873 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10874 // CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10875 // CHECK:   ret void
test_vst1_s32(int32_t * a,int32x2_t b)10876 void test_vst1_s32(int32_t *a, int32x2_t b) {
10877   vst1_s32(a, b);
10878 }
10879 
10880 // CHECK-LABEL: @test_vst1_s64(
10881 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10882 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10883 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10884 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10885 // CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10886 // CHECK:   ret void
test_vst1_s64(int64_t * a,int64x1_t b)10887 void test_vst1_s64(int64_t *a, int64x1_t b) {
10888   vst1_s64(a, b);
10889 }
10890 
10891 // CHECK-LABEL: @test_vst1_f16(
10892 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
10893 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
10894 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
10895 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
10896 // CHECK:   store <4 x half> [[TMP3]], <4 x half>* [[TMP2]]
10897 // CHECK:   ret void
test_vst1_f16(float16_t * a,float16x4_t b)10898 void test_vst1_f16(float16_t *a, float16x4_t b) {
10899   vst1_f16(a, b);
10900 }
10901 
10902 // CHECK-LABEL: @test_vst1_f32(
10903 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
10904 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
10905 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
10906 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
10907 // CHECK:   store <2 x float> [[TMP3]], <2 x float>* [[TMP2]]
10908 // CHECK:   ret void
test_vst1_f32(float32_t * a,float32x2_t b)10909 void test_vst1_f32(float32_t *a, float32x2_t b) {
10910   vst1_f32(a, b);
10911 }
10912 
10913 // CHECK-LABEL: @test_vst1_f64(
10914 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
10915 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
10916 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
10917 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
10918 // CHECK:   store <1 x double> [[TMP3]], <1 x double>* [[TMP2]]
10919 // CHECK:   ret void
test_vst1_f64(float64_t * a,float64x1_t b)10920 void test_vst1_f64(float64_t *a, float64x1_t b) {
10921   vst1_f64(a, b);
10922 }
10923 
10924 // CHECK-LABEL: @test_vst1_p8(
10925 // CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10926 // CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
10927 // CHECK:   ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)10928 void test_vst1_p8(poly8_t *a, poly8x8_t b) {
10929   vst1_p8(a, b);
10930 }
10931 
10932 // CHECK-LABEL: @test_vst1_p16(
10933 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10934 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10935 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10936 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10937 // CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10938 // CHECK:   ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)10939 void test_vst1_p16(poly16_t *a, poly16x4_t b) {
10940   vst1_p16(a, b);
10941 }
10942 
10943 // CHECK-LABEL: @test_vst2q_u8(
10944 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
10945 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
10946 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
10947 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
10948 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
10949 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
10950 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10951 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10952 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
10953 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
10954 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10955 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
10956 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
10957 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
10958 // CHECK:   ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)10959 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
10960   vst2q_u8(a, b);
10961 }
10962 
10963 // CHECK-LABEL: @test_vst2q_u16(
10964 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
10965 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
10966 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
10967 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
10968 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
10969 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
10970 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10971 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
10972 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10973 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
10974 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
10975 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10976 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10977 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
10978 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
10979 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10980 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10981 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10982 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
10983 // CHECK:   ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)10984 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
10985   vst2q_u16(a, b);
10986 }
10987 
10988 // CHECK-LABEL: @test_vst2q_u32(
10989 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
10990 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
10991 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
10992 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
10993 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
10994 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
10995 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10996 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
10997 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
10998 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
10999 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11000 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11001 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
11002 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11003 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11004 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11005 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11006 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11007 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11008 // CHECK:   ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)11009 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
11010   vst2q_u32(a, b);
11011 }
11012 
11013 // CHECK-LABEL: @test_vst2q_u64(
11014 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
11015 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
11016 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
11017 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11018 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
11019 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
11020 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11021 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11022 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
11023 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11024 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11025 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11026 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
11027 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11028 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11029 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11030 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11031 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11032 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11033 // CHECK:   ret void
test_vst2q_u64(uint64_t * a,uint64x2x2_t b)11034 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
11035   vst2q_u64(a, b);
11036 }
11037 
11038 // CHECK-LABEL: @test_vst2q_s8(
11039 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
11040 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
11041 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
11042 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11043 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
11044 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
11045 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11046 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
11047 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11048 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11049 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
11050 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11051 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11052 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11053 // CHECK:   ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)11054 void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
11055   vst2q_s8(a, b);
11056 }
11057 
11058 // CHECK-LABEL: @test_vst2q_s16(
11059 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
11060 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
11061 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
11062 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11063 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
11064 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
11065 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11066 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11067 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
11068 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11069 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11070 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11071 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
11072 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11073 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11074 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11075 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11076 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11077 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11078 // CHECK:   ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)11079 void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
11080   vst2q_s16(a, b);
11081 }
11082 
11083 // CHECK-LABEL: @test_vst2q_s32(
11084 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
11085 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
11086 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
11087 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
11088 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
11089 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
11090 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11091 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11092 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11093 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
11094 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11095 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11096 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11097 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11098 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11099 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11100 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11101 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11102 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11103 // CHECK:   ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)11104 void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
11105   vst2q_s32(a, b);
11106 }
11107 
11108 // CHECK-LABEL: @test_vst2q_s64(
11109 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
11110 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
11111 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
11112 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11113 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
11114 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
11115 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11116 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11117 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11118 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11119 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11120 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11121 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11122 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11123 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11124 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11125 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11126 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11127 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11128 // CHECK:   ret void
test_vst2q_s64(int64_t * a,int64x2x2_t b)11129 void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
11130   vst2q_s64(a, b);
11131 }
11132 
11133 // CHECK-LABEL: @test_vst2q_f16(
11134 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
11135 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
11136 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
11137 // CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
11138 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
11139 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
11140 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11141 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11142 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11143 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
11144 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11145 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11146 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11147 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
11148 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11149 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11150 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11151 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11152 // CHECK:   call void @llvm.aarch64.neon.st2.v8f16.p0i8(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i8* [[TMP2]])
11153 // CHECK:   ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)11154 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
11155   vst2q_f16(a, b);
11156 }
11157 
11158 // CHECK-LABEL: @test_vst2q_f32(
11159 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
11160 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
11161 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
11162 // CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
11163 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
11164 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
11165 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11166 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11167 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11168 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
11169 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11170 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11171 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11172 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
11173 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11174 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11175 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11176 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11177 // CHECK:   call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]])
11178 // CHECK:   ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)11179 void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
11180   vst2q_f32(a, b);
11181 }
11182 
11183 // CHECK-LABEL: @test_vst2q_f64(
11184 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
11185 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
11186 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
11187 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
11188 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
11189 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
11190 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11191 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
11192 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11193 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
11194 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11195 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11196 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11197 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
11198 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11199 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11200 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11201 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11202 // CHECK:   call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]])
11203 // CHECK:   ret void
test_vst2q_f64(float64_t * a,float64x2x2_t b)11204 void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
11205   vst2q_f64(a, b);
11206 }
11207 
11208 // CHECK-LABEL: @test_vst2q_p8(
11209 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
11210 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
11211 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
11212 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11213 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
11214 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
11215 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11216 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11217 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11218 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11219 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11220 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11221 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11222 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11223 // CHECK:   ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)11224 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
11225   vst2q_p8(a, b);
11226 }
11227 
11228 // CHECK-LABEL: @test_vst2q_p16(
11229 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
11230 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
11231 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
11232 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11233 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
11234 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
11235 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11236 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11237 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11238 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11239 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11240 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11241 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11242 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11243 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11244 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11245 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11246 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11247 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11248 // CHECK:   ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)11249 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
11250   vst2q_p16(a, b);
11251 }
11252 
11253 // CHECK-LABEL: @test_vst2_u8(
11254 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
11255 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
11256 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
11257 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11258 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
11259 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
11260 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11261 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11262 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11263 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11264 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11265 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11266 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11267 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11268 // CHECK:   ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)11269 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
11270   vst2_u8(a, b);
11271 }
11272 
11273 // CHECK-LABEL: @test_vst2_u16(
11274 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
11275 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
11276 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
11277 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11278 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
11279 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
11280 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11281 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11282 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11283 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11284 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11285 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11286 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11287 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11288 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11289 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11290 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11291 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11292 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11293 // CHECK:   ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)11294 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
11295   vst2_u16(a, b);
11296 }
11297 
11298 // CHECK-LABEL: @test_vst2_u32(
11299 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
11300 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
11301 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
11302 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11303 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
11304 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
11305 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11306 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11307 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11308 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11309 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11310 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11311 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11312 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11313 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11314 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11315 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11316 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11317 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11318 // CHECK:   ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)11319 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
11320   vst2_u32(a, b);
11321 }
11322 
11323 // CHECK-LABEL: @test_vst2_u64(
11324 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
11325 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
11326 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
11327 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11328 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
11329 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
11330 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11331 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11332 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11333 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11334 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11335 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11336 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11337 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11338 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11339 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11340 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11341 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11342 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11343 // CHECK:   ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)11344 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
11345   vst2_u64(a, b);
11346 }
11347 
11348 // CHECK-LABEL: @test_vst2_s8(
11349 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
11350 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
11351 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
11352 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11353 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
11354 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
11355 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11356 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11357 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11358 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11359 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11360 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11361 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11362 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11363 // CHECK:   ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)11364 void test_vst2_s8(int8_t *a, int8x8x2_t b) {
11365   vst2_s8(a, b);
11366 }
11367 
11368 // CHECK-LABEL: @test_vst2_s16(
11369 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
11370 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
11371 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
11372 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11373 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
11374 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
11375 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11376 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11377 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11378 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11379 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11380 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11381 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11382 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11383 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11384 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11385 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11386 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11387 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11388 // CHECK:   ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)11389 void test_vst2_s16(int16_t *a, int16x4x2_t b) {
11390   vst2_s16(a, b);
11391 }
11392 
11393 // CHECK-LABEL: @test_vst2_s32(
11394 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
11395 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
11396 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
11397 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11398 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
11399 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
11400 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11401 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11402 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11403 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11404 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11405 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11406 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11407 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11408 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11409 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11410 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11411 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11412 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11413 // CHECK:   ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)11414 void test_vst2_s32(int32_t *a, int32x2x2_t b) {
11415   vst2_s32(a, b);
11416 }
11417 
11418 // CHECK-LABEL: @test_vst2_s64(
11419 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
11420 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
11421 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
11422 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11423 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
11424 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
11425 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11426 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11427 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11428 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11429 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11430 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11431 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11432 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11433 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11434 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11435 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11436 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11437 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11438 // CHECK:   ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)11439 void test_vst2_s64(int64_t *a, int64x1x2_t b) {
11440   vst2_s64(a, b);
11441 }
11442 
11443 // CHECK-LABEL: @test_vst2_f16(
11444 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
11445 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
11446 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
11447 // CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
11448 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
11449 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
11450 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11451 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11452 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11453 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
11454 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
11455 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
11456 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11457 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
11458 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
11459 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
11460 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
11461 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
11462 // CHECK:   call void @llvm.aarch64.neon.st2.v4f16.p0i8(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i8* [[TMP2]])
11463 // CHECK:   ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)11464 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
11465   vst2_f16(a, b);
11466 }
11467 
11468 // CHECK-LABEL: @test_vst2_f32(
11469 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
11470 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
11471 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
11472 // CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
11473 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
11474 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
11475 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11476 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11477 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11478 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
11479 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
11480 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
11481 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11482 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
11483 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
11484 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
11485 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
11486 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
11487 // CHECK:   call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]])
11488 // CHECK:   ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)11489 void test_vst2_f32(float32_t *a, float32x2x2_t b) {
11490   vst2_f32(a, b);
11491 }
11492 
11493 // CHECK-LABEL: @test_vst2_f64(
11494 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
11495 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
11496 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
11497 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
11498 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
11499 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
11500 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11501 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
11502 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11503 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
11504 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
11505 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
11506 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11507 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
11508 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
11509 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
11510 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
11511 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
11512 // CHECK:   call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]])
11513 // CHECK:   ret void
test_vst2_f64(float64_t * a,float64x1x2_t b)11514 void test_vst2_f64(float64_t *a, float64x1x2_t b) {
11515   vst2_f64(a, b);
11516 }
11517 
11518 // CHECK-LABEL: @test_vst2_p8(
11519 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
11520 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
11521 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
11522 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11523 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
11524 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
11525 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11526 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11527 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11528 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11529 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11530 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11531 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11532 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11533 // CHECK:   ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)11534 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
11535   vst2_p8(a, b);
11536 }
11537 
11538 // CHECK-LABEL: @test_vst2_p16(
11539 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
11540 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
11541 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
11542 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11543 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
11544 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
11545 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11546 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11547 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11548 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11549 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11550 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11551 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11552 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11553 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11554 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11555 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11556 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11557 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11558 // CHECK:   ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)11559 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
11560   vst2_p16(a, b);
11561 }
11562 
11563 // CHECK-LABEL: @test_vst3q_u8(
11564 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
11565 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
11566 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
11567 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11568 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
11569 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
11570 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11571 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11572 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11573 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11574 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11575 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11576 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11577 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11578 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11579 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11580 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11581 // CHECK:   ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)11582 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
11583   vst3q_u8(a, b);
11584 }
11585 
11586 // CHECK-LABEL: @test_vst3q_u16(
11587 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
11588 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
11589 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
11590 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11591 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
11592 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
11593 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11594 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11595 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11596 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11597 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11598 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11599 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11600 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11601 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11602 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11603 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11604 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11605 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11606 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11607 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11608 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11609 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11610 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11611 // CHECK:   ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)11612 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
11613   vst3q_u16(a, b);
11614 }
11615 
11616 // CHECK-LABEL: @test_vst3q_u32(
11617 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
11618 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
11619 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
11620 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11621 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
11622 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
11623 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11624 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11625 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11626 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11627 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11628 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11629 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11630 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11631 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11632 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11633 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11634 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11635 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11636 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11637 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11638 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11639 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11640 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11641 // CHECK:   ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)11642 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
11643   vst3q_u32(a, b);
11644 }
11645 
11646 // CHECK-LABEL: @test_vst3q_u64(
11647 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
11648 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
11649 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
11650 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11651 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
11652 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
11653 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11654 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11655 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11656 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11657 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11658 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11659 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11660 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11661 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11662 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11663 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11664 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11665 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11666 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11667 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11668 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11669 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11670 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11671 // CHECK:   ret void
test_vst3q_u64(uint64_t * a,uint64x2x3_t b)11672 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
11673   vst3q_u64(a, b);
11674 }
11675 
11676 // CHECK-LABEL: @test_vst3q_s8(
11677 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
11678 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
11679 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
11680 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11681 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
11682 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
11683 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11684 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11685 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11686 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11687 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11688 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11689 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11690 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11691 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11692 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11693 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11694 // CHECK:   ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)11695 void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
11696   vst3q_s8(a, b);
11697 }
11698 
11699 // CHECK-LABEL: @test_vst3q_s16(
11700 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
11701 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
11702 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
11703 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11704 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
11705 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
11706 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11707 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11708 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11709 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11710 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11711 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11712 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11713 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11714 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11715 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11716 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11717 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11718 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11719 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11720 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11721 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11722 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11723 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11724 // CHECK:   ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)11725 void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
11726   vst3q_s16(a, b);
11727 }
11728 
11729 // CHECK-LABEL: @test_vst3q_s32(
11730 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
11731 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
11732 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
11733 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11734 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
11735 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
11736 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11737 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11738 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11739 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11740 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11741 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11742 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11743 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11744 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11745 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11746 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11747 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11748 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11749 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11750 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11751 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11752 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11753 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11754 // CHECK:   ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)11755 void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
11756   vst3q_s32(a, b);
11757 }
11758 
11759 // CHECK-LABEL: @test_vst3q_s64(
11760 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
11761 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
11762 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
11763 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11764 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
11765 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
11766 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11767 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11768 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11769 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11770 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11771 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11772 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11773 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11774 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11775 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11776 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11777 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11778 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11779 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11780 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11781 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11782 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11783 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11784 // CHECK:   ret void
test_vst3q_s64(int64_t * a,int64x2x3_t b)11785 void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
11786   vst3q_s64(a, b);
11787 }
11788 
11789 // CHECK-LABEL: @test_vst3q_f16(
11790 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
11791 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
11792 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
11793 // CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
11794 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
11795 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
11796 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11797 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11798 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11799 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
11800 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11801 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11802 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11803 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
11804 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11805 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11806 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11807 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
11808 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
11809 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
11810 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11811 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11812 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
11813 // CHECK:   call void @llvm.aarch64.neon.st3.v8f16.p0i8(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i8* [[TMP2]])
11814 // CHECK:   ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)11815 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
11816   vst3q_f16(a, b);
11817 }
11818 
11819 // CHECK-LABEL: @test_vst3q_f32(
11820 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
11821 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
11822 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
11823 // CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
11824 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
11825 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
11826 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11827 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11828 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11829 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
11830 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11831 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11832 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11833 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
11834 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11835 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11836 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11837 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
11838 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
11839 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
11840 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11841 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11842 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
11843 // CHECK:   call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]])
11844 // CHECK:   ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)11845 void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
11846   vst3q_f32(a, b);
11847 }
11848 
11849 // CHECK-LABEL: @test_vst3q_f64(
11850 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
11851 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
11852 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
11853 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
11854 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
11855 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
11856 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11857 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
11858 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11859 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
11860 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11861 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11862 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11863 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
11864 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11865 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11866 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11867 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
11868 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
11869 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
11870 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11871 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11872 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
11873 // CHECK:   call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]])
11874 // CHECK:   ret void
test_vst3q_f64(float64_t * a,float64x2x3_t b)11875 void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
11876   vst3q_f64(a, b);
11877 }
11878 
11879 // CHECK-LABEL: @test_vst3q_p8(
11880 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
11881 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
11882 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
11883 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11884 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
11885 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
11886 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11887 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11888 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11889 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11890 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11891 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11892 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11893 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11894 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11895 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11896 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11897 // CHECK:   ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)11898 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
11899   vst3q_p8(a, b);
11900 }
11901 
11902 // CHECK-LABEL: @test_vst3q_p16(
11903 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
11904 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
11905 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
11906 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11907 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
11908 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
11909 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11910 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11911 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11912 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11913 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11914 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11915 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11916 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11917 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11918 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11919 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11920 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11921 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11922 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11923 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11924 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11925 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11926 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11927 // CHECK:   ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)11928 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
11929   vst3q_p16(a, b);
11930 }
11931 
11932 // CHECK-LABEL: @test_vst3_u8(
11933 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
11934 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
11935 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
11936 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
11937 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
11938 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
11939 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11940 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11941 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
11942 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11943 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11944 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11945 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11946 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11947 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
11948 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
11949 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
11950 // CHECK:   ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)11951 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
11952   vst3_u8(a, b);
11953 }
11954 
11955 // CHECK-LABEL: @test_vst3_u16(
11956 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
11957 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
11958 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
11959 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
11960 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
11961 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
11962 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11963 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11964 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11965 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
11966 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11967 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11968 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11969 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11970 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11971 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11972 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11973 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
11974 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
11975 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11976 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11977 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11978 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11979 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
11980 // CHECK:   ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)11981 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
11982   vst3_u16(a, b);
11983 }
11984 
11985 // CHECK-LABEL: @test_vst3_u32(
11986 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
11987 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
11988 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
11989 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
11990 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
11991 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
11992 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11993 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11994 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11995 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
11996 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11997 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11998 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11999 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12000 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12001 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12002 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12003 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12004 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12005 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12006 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12007 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12008 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12009 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12010 // CHECK:   ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)12011 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
12012   vst3_u32(a, b);
12013 }
12014 
12015 // CHECK-LABEL: @test_vst3_u64(
12016 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
12017 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
12018 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
12019 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12020 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
12021 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
12022 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12023 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12024 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12025 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12026 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12027 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12028 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12029 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12030 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12031 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12032 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12033 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12034 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12035 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12036 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12037 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12038 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12039 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12040 // CHECK:   ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)12041 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
12042   vst3_u64(a, b);
12043 }
12044 
12045 // CHECK-LABEL: @test_vst3_s8(
12046 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
12047 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
12048 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
12049 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12050 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
12051 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
12052 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12053 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12054 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12055 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12056 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12057 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12058 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12059 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12060 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12061 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12062 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12063 // CHECK:   ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)12064 void test_vst3_s8(int8_t *a, int8x8x3_t b) {
12065   vst3_s8(a, b);
12066 }
12067 
12068 // CHECK-LABEL: @test_vst3_s16(
12069 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
12070 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
12071 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
12072 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12073 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
12074 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
12075 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12076 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12077 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12078 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12079 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12080 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12081 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12082 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12083 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12084 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12085 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12086 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12087 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12088 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12089 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12090 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12091 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12092 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12093 // CHECK:   ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)12094 void test_vst3_s16(int16_t *a, int16x4x3_t b) {
12095   vst3_s16(a, b);
12096 }
12097 
12098 // CHECK-LABEL: @test_vst3_s32(
12099 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
12100 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
12101 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
12102 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
12103 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
12104 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
12105 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12106 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12107 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12108 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12109 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12110 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12111 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12112 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12113 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12114 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12115 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12116 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12117 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12118 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12119 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12120 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12121 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12122 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12123 // CHECK:   ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)12124 void test_vst3_s32(int32_t *a, int32x2x3_t b) {
12125   vst3_s32(a, b);
12126 }
12127 
12128 // CHECK-LABEL: @test_vst3_s64(
12129 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
12130 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
12131 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
12132 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12133 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
12134 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
12135 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12136 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12137 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12138 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12139 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12140 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12141 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12142 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12143 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12144 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12145 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12146 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12147 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12148 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12149 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12150 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12151 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12152 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12153 // CHECK:   ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)12154 void test_vst3_s64(int64_t *a, int64x1x3_t b) {
12155   vst3_s64(a, b);
12156 }
12157 
12158 // CHECK-LABEL: @test_vst3_f16(
12159 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
12160 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
12161 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
12162 // CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
12163 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
12164 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
12165 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12166 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12167 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12168 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
12169 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12170 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12171 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12172 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
12173 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12174 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12175 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12176 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
12177 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
12178 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12179 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12180 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12181 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12182 // CHECK:   call void @llvm.aarch64.neon.st3.v4f16.p0i8(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i8* [[TMP2]])
12183 // CHECK:   ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)12184 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
12185   vst3_f16(a, b);
12186 }
12187 
12188 // CHECK-LABEL: @test_vst3_f32(
12189 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
12190 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
12191 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
12192 // CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
12193 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
12194 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
12195 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12196 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12197 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12198 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
12199 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12200 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12201 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12202 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
12203 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12204 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12205 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12206 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
12207 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
12208 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12209 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12210 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12211 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12212 // CHECK:   call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]])
12213 // CHECK:   ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)12214 void test_vst3_f32(float32_t *a, float32x2x3_t b) {
12215   vst3_f32(a, b);
12216 }
12217 
12218 // CHECK-LABEL: @test_vst3_f64(
12219 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
12220 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
12221 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
12222 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
12223 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
12224 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
12225 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12226 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12227 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12228 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
12229 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12230 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12231 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12232 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
12233 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12234 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12235 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12236 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
12237 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
12238 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12239 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12240 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12241 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12242 // CHECK:   call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]])
12243 // CHECK:   ret void
test_vst3_f64(float64_t * a,float64x1x3_t b)12244 void test_vst3_f64(float64_t *a, float64x1x3_t b) {
12245   vst3_f64(a, b);
12246 }
12247 
12248 // CHECK-LABEL: @test_vst3_p8(
12249 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
12250 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
12251 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
12252 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12253 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
12254 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
12255 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12256 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12257 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12258 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12259 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12260 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12261 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12262 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12263 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12264 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12265 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12266 // CHECK:   ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)12267 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
12268   vst3_p8(a, b);
12269 }
12270 
12271 // CHECK-LABEL: @test_vst3_p16(
12272 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
12273 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
12274 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
12275 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12276 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
12277 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
12278 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12279 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12280 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12281 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12282 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12283 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12284 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12285 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12286 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12287 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12288 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12289 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12290 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12291 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12292 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12293 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12294 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12295 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12296 // CHECK:   ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)12297 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
12298   vst3_p16(a, b);
12299 }
12300 
12301 // CHECK-LABEL: @test_vst4q_u8(
12302 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
12303 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
12304 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
12305 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12306 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
12307 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
12308 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12309 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12310 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12311 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12312 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12313 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12314 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12315 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12316 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12317 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12318 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12319 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12320 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12321 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12322 // CHECK:   ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)12323 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
12324   vst4q_u8(a, b);
12325 }
12326 
12327 // CHECK-LABEL: @test_vst4q_u16(
12328 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
12329 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
12330 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
12331 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12332 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
12333 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
12334 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12335 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12336 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12337 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12338 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12339 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12340 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12341 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12342 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12343 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12344 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12345 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12346 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12347 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12348 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12349 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12350 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12351 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12352 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12353 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12354 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12355 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12356 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12357 // CHECK:   ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)12358 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
12359   vst4q_u16(a, b);
12360 }
12361 
12362 // CHECK-LABEL: @test_vst4q_u32(
12363 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
12364 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
12365 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
12366 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12367 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
12368 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
12369 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12370 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12371 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12372 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12373 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12374 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12375 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12376 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12377 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12378 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12379 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12380 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12381 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12382 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12383 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12384 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12385 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12386 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12387 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12388 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12389 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12390 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12391 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12392 // CHECK:   ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)12393 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
12394   vst4q_u32(a, b);
12395 }
12396 
12397 // CHECK-LABEL: @test_vst4q_u64(
12398 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
12399 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
12400 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
12401 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12402 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
12403 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
12404 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12405 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12406 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12407 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12408 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12409 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12410 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12411 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12412 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12413 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12414 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12415 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12416 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12417 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12418 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12419 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12420 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12421 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12422 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12423 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12424 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12425 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12426 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12427 // CHECK:   ret void
test_vst4q_u64(uint64_t * a,uint64x2x4_t b)12428 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
12429   vst4q_u64(a, b);
12430 }
12431 
12432 // CHECK-LABEL: @test_vst4q_s8(
12433 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
12434 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
12435 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
12436 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12437 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
12438 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
12439 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12440 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12441 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12442 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12443 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12444 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12445 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12446 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12447 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12448 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12449 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12450 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12451 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12452 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12453 // CHECK:   ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)12454 void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
12455   vst4q_s8(a, b);
12456 }
12457 
12458 // CHECK-LABEL: @test_vst4q_s16(
12459 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
12460 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
12461 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
12462 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12463 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
12464 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
12465 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12466 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12467 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12468 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12469 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12470 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12471 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12472 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12473 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12474 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12475 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12476 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12477 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12478 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12479 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12480 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12481 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12482 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12483 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12484 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12485 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12486 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12487 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12488 // CHECK:   ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)12489 void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
12490   vst4q_s16(a, b);
12491 }
12492 
12493 // CHECK-LABEL: @test_vst4q_s32(
12494 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
12495 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
12496 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
12497 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12498 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
12499 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
12500 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12501 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12502 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12503 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12504 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12505 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12506 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12507 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12508 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12509 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12510 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12511 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12512 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12513 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12514 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12515 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12516 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12517 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12518 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12519 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12520 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12521 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12522 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12523 // CHECK:   ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)12524 void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
12525   vst4q_s32(a, b);
12526 }
12527 
12528 // CHECK-LABEL: @test_vst4q_s64(
12529 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
12530 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
12531 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
12532 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12533 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
12534 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
12535 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12536 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12537 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12538 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12539 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12540 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12541 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12542 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12543 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12544 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12545 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12546 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12547 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12548 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12549 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12550 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12551 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12552 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12553 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12554 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12555 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12556 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12557 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12558 // CHECK:   ret void
test_vst4q_s64(int64_t * a,int64x2x4_t b)12559 void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
12560   vst4q_s64(a, b);
12561 }
12562 
12563 // CHECK-LABEL: @test_vst4q_f16(
12564 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
12565 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
12566 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
12567 // CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
12568 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
12569 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
12570 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12571 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12572 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12573 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
12574 // CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
12575 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
12576 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12577 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
12578 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
12579 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
12580 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12581 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
12582 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
12583 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
12584 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12585 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
12586 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
12587 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
12588 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
12589 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
12590 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
12591 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
12592 // CHECK:   call void @llvm.aarch64.neon.st4.v8f16.p0i8(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i8* [[TMP2]])
12593 // CHECK:   ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)12594 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
12595   vst4q_f16(a, b);
12596 }
12597 
12598 // CHECK-LABEL: @test_vst4q_f32(
12599 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
12600 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
12601 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
12602 // CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
12603 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
12604 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
12605 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12606 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12607 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12608 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
12609 // CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
12610 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
12611 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12612 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
12613 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12614 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12615 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12616 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
12617 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
12618 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
12619 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12620 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
12621 // CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
12622 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
12623 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12624 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12625 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
12626 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
12627 // CHECK:   call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]])
12628 // CHECK:   ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)12629 void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
12630   vst4q_f32(a, b);
12631 }
12632 
12633 // CHECK-LABEL: @test_vst4q_f64(
12634 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
12635 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
12636 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
12637 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
12638 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
12639 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
12640 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12641 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12642 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12643 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
12644 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12645 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12646 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12647 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
12648 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12649 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12650 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12651 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
12652 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
12653 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12654 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12655 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
12656 // CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
12657 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
12658 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12659 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12660 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12661 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
12662 // CHECK:   call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]])
12663 // CHECK:   ret void
test_vst4q_f64(float64_t * a,float64x2x4_t b)12664 void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
12665   vst4q_f64(a, b);
12666 }
12667 
12668 // CHECK-LABEL: @test_vst4q_p8(
12669 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
12670 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
12671 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
12672 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12673 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
12674 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
12675 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12676 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12677 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12678 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12679 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12680 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12681 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12682 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12683 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12684 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12685 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12686 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12687 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12688 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12689 // CHECK:   ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)12690 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
12691   vst4q_p8(a, b);
12692 }
12693 
12694 // CHECK-LABEL: @test_vst4q_p16(
12695 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
12696 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
12697 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
12698 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12699 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
12700 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
12701 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12702 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12703 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12704 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12705 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12706 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12707 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12708 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12709 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12710 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12711 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12712 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12713 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12714 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12715 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12716 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12717 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12718 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12719 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12720 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12721 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12722 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12723 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12724 // CHECK:   ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)12725 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
12726   vst4q_p16(a, b);
12727 }
12728 
12729 // CHECK-LABEL: @test_vst4_u8(
12730 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
12731 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
12732 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
12733 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12734 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
12735 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
12736 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12737 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12738 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12739 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12740 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12741 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12742 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12743 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12744 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12745 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12746 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12747 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12748 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12749 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12750 // CHECK:   ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)12751 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
12752   vst4_u8(a, b);
12753 }
12754 
12755 // CHECK-LABEL: @test_vst4_u16(
12756 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
12757 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
12758 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
12759 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12760 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
12761 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
12762 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12763 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12764 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12765 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12766 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12767 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12768 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12769 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12770 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12771 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12772 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12773 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12774 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12775 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12776 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12777 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12778 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12779 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12780 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12781 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12782 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12783 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12784 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12785 // CHECK:   ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)12786 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
12787   vst4_u16(a, b);
12788 }
12789 
12790 // CHECK-LABEL: @test_vst4_u32(
12791 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
12792 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
12793 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
12794 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12795 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
12796 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
12797 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12798 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12799 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12800 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12801 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12802 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12803 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12804 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12805 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12806 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12807 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12808 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12809 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12810 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12811 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12812 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12813 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12814 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12815 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12816 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12817 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12818 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12819 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12820 // CHECK:   ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)12821 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
12822   vst4_u32(a, b);
12823 }
12824 
12825 // CHECK-LABEL: @test_vst4_u64(
12826 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
12827 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
12828 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
12829 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12830 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
12831 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
12832 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12833 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12834 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12835 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12836 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12837 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12838 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12839 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12840 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12841 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12842 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12843 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12844 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12845 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12846 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12847 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12848 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12849 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12850 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12851 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12852 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12853 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12854 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12855 // CHECK:   ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)12856 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
12857   vst4_u64(a, b);
12858 }
12859 
12860 // CHECK-LABEL: @test_vst4_s8(
12861 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
12862 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
12863 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
12864 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12865 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
12866 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
12867 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12868 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12869 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12870 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12871 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12872 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12873 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12874 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12875 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12876 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12877 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12878 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12879 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12880 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12881 // CHECK:   ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)12882 void test_vst4_s8(int8_t *a, int8x8x4_t b) {
12883   vst4_s8(a, b);
12884 }
12885 
12886 // CHECK-LABEL: @test_vst4_s16(
12887 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
12888 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
12889 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
12890 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12891 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
12892 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
12893 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12894 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12895 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12896 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12897 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12898 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12899 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12900 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12901 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12902 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12903 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12904 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12905 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12906 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12907 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12908 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12909 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12910 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12911 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12912 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12913 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12914 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12915 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12916 // CHECK:   ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)12917 void test_vst4_s16(int16_t *a, int16x4x4_t b) {
12918   vst4_s16(a, b);
12919 }
12920 
12921 // CHECK-LABEL: @test_vst4_s32(
12922 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
12923 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
12924 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
12925 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12926 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
12927 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
12928 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12929 // CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12930 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12931 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12932 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12933 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12934 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12935 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12936 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12937 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12938 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12939 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12940 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12941 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12942 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12943 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12944 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12945 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12946 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12947 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12948 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12949 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12950 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12951 // CHECK:   ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)12952 void test_vst4_s32(int32_t *a, int32x2x4_t b) {
12953   vst4_s32(a, b);
12954 }
12955 
12956 // CHECK-LABEL: @test_vst4_s64(
12957 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
12958 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
12959 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
12960 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12961 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
12962 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
12963 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12964 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12965 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12966 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12967 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12968 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12969 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12970 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12971 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12972 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12973 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12974 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12975 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12976 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12977 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12978 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12979 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12980 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12981 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12982 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12983 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12984 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12985 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12986 // CHECK:   ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)12987 void test_vst4_s64(int64_t *a, int64x1x4_t b) {
12988   vst4_s64(a, b);
12989 }
12990 
12991 // CHECK-LABEL: @test_vst4_f16(
12992 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
12993 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
12994 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
12995 // CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
12996 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
12997 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
12998 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12999 // CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
13000 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13001 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
13002 // CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
13003 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
13004 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13005 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
13006 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
13007 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
13008 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13009 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
13010 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
13011 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
13012 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13013 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
13014 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
13015 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
13016 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
13017 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
13018 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
13019 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
13020 // CHECK:   call void @llvm.aarch64.neon.st4.v4f16.p0i8(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i8* [[TMP2]])
13021 // CHECK:   ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)13022 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
13023   vst4_f16(a, b);
13024 }
13025 
13026 // CHECK-LABEL: @test_vst4_f32(
13027 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
13028 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
13029 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
13030 // CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
13031 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
13032 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
13033 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13034 // CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
13035 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13036 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
13037 // CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
13038 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
13039 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13040 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
13041 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
13042 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
13043 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13044 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
13045 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
13046 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
13047 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13048 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
13049 // CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
13050 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
13051 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
13052 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
13053 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
13054 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
13055 // CHECK:   call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]])
13056 // CHECK:   ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)13057 void test_vst4_f32(float32_t *a, float32x2x4_t b) {
13058   vst4_f32(a, b);
13059 }
13060 
13061 // CHECK-LABEL: @test_vst4_f64(
13062 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
13063 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
13064 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
13065 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
13066 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
13067 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
13068 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13069 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13070 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13071 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
13072 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13073 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13074 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13075 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
13076 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13077 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13078 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13079 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
13080 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13081 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13082 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13083 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
13084 // CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
13085 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
13086 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13087 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13088 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13089 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
13090 // CHECK:   call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]])
13091 // CHECK:   ret void
test_vst4_f64(float64_t * a,float64x1x4_t b)13092 void test_vst4_f64(float64_t *a, float64x1x4_t b) {
13093   vst4_f64(a, b);
13094 }
13095 
13096 // CHECK-LABEL: @test_vst4_p8(
13097 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
13098 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
13099 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
13100 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13101 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
13102 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
13103 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13104 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13105 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13106 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13107 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13108 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13109 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13110 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13111 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13112 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13113 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13114 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13115 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13116 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13117 // CHECK:   ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)13118 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
13119   vst4_p8(a, b);
13120 }
13121 
13122 // CHECK-LABEL: @test_vst4_p16(
13123 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
13124 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
13125 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
13126 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13127 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
13128 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
13129 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13130 // CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13131 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13132 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13133 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13134 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13135 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13136 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13137 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13138 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13139 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13140 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13141 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13142 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13143 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13144 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13145 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13146 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13147 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13148 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13149 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13150 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13151 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13152 // CHECK:   ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)13153 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
13154   vst4_p16(a, b);
13155 }
13156 
13157 // CHECK-LABEL: @test_vld1q_f64_x2(
13158 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
13159 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
13160 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13161 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13162 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13163 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]])
13164 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
13165 // CHECK:   store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
13166 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
13167 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13168 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13169 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
13170 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
test_vld1q_f64_x2(float64_t const * a)13171 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
13172   return vld1q_f64_x2(a);
13173 }
13174 
13175 // CHECK-LABEL: @test_vld1q_p64_x2(
13176 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
13177 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
13178 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13179 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13180 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13181 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
13182 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
13183 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
13184 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
13185 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13186 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13187 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
13188 // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
test_vld1q_p64_x2(poly64_t const * a)13189 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
13190   return vld1q_p64_x2(a);
13191 }
13192 
13193 // CHECK-LABEL: @test_vld1_f64_x2(
13194 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
13195 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
13196 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13197 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13198 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13199 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]])
13200 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
13201 // CHECK:   store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
13202 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
13203 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13204 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13205 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
13206 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
test_vld1_f64_x2(float64_t const * a)13207 float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
13208   return vld1_f64_x2(a);
13209 }
13210 
13211 // CHECK-LABEL: @test_vld1_p64_x2(
13212 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
13213 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
13214 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13215 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13216 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13217 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
13218 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
13219 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
13220 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
13221 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13222 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13223 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
13224 // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
test_vld1_p64_x2(poly64_t const * a)13225 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
13226   return vld1_p64_x2(a);
13227 }
13228 
13229 // CHECK-LABEL: @test_vld1q_f64_x3(
13230 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
13231 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
13232 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13233 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13234 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13235 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]])
13236 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
13237 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13238 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
13239 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13240 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13241 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
13242 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
test_vld1q_f64_x3(float64_t const * a)13243 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
13244   return vld1q_f64_x3(a);
13245 }
13246 
13247 // CHECK-LABEL: @test_vld1q_p64_x3(
13248 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
13249 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
13250 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13251 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13252 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13253 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
13254 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
13255 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13256 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
13257 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13258 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13259 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
13260 // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
test_vld1q_p64_x3(poly64_t const * a)13261 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
13262   return vld1q_p64_x3(a);
13263 }
13264 
13265 // CHECK-LABEL: @test_vld1_f64_x3(
13266 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
13267 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
13268 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13269 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13270 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13271 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]])
13272 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
13273 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13274 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
13275 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13276 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13277 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
13278 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
test_vld1_f64_x3(float64_t const * a)13279 float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
13280   return vld1_f64_x3(a);
13281 }
13282 
13283 // CHECK-LABEL: @test_vld1_p64_x3(
13284 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
13285 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
13286 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13287 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13288 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13289 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
13290 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
13291 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13292 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
13293 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13294 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13295 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
13296 // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
test_vld1_p64_x3(poly64_t const * a)13297 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
13298   return vld1_p64_x3(a);
13299 }
13300 
13301 // CHECK-LABEL: @test_vld1q_f64_x4(
13302 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
13303 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
13304 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13305 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13306 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13307 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]])
13308 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
13309 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13310 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
13311 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13312 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13313 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
13314 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
test_vld1q_f64_x4(float64_t const * a)13315 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
13316   return vld1q_f64_x4(a);
13317 }
13318 
13319 // CHECK-LABEL: @test_vld1q_p64_x4(
13320 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
13321 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
13322 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13323 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13324 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13325 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
13326 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
13327 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13328 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
13329 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13330 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13331 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
13332 // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
test_vld1q_p64_x4(poly64_t const * a)13333 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
13334   return vld1q_p64_x4(a);
13335 }
13336 
13337 // CHECK-LABEL: @test_vld1_f64_x4(
13338 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
13339 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
13340 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13341 // CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
13342 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13343 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]])
13344 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
13345 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13346 // CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
13347 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13348 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13349 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
13350 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
test_vld1_f64_x4(float64_t const * a)13351 float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
13352   return vld1_f64_x4(a);
13353 }
13354 
13355 // CHECK-LABEL: @test_vld1_p64_x4(
13356 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
13357 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
13358 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13359 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
13360 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13361 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
13362 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
13363 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13364 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
13365 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13366 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13367 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
13368 // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
test_vld1_p64_x4(poly64_t const * a)13369 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
13370   return vld1_p64_x4(a);
13371 }
13372 
13373 // CHECK-LABEL: @test_vst1q_f64_x2(
13374 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
13375 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
13376 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
13377 // CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
13378 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
13379 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
13380 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13381 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13382 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13383 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
13384 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13385 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13386 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13387 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
13388 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13389 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13390 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13391 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13392 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13393 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]])
13394 // CHECK:   ret void
test_vst1q_f64_x2(float64_t * a,float64x2x2_t b)13395 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
13396   vst1q_f64_x2(a, b);
13397 }
13398 
13399 // CHECK-LABEL: @test_vst1q_p64_x2(
13400 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
13401 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
13402 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
13403 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
13404 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
13405 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
13406 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13407 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13408 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13409 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
13410 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13411 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13412 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13413 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13414 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13415 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13416 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13417 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13418 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13419 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
13420 // CHECK:   ret void
test_vst1q_p64_x2(poly64_t * a,poly64x2x2_t b)13421 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
13422   vst1q_p64_x2(a, b);
13423 }
13424 
13425 // CHECK-LABEL: @test_vst1_f64_x2(
13426 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
13427 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
13428 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
13429 // CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
13430 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
13431 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
13432 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13433 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13434 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13435 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
13436 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13437 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13438 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13439 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
13440 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13441 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13442 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13443 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13444 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13445 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]])
13446 // CHECK:   ret void
test_vst1_f64_x2(float64_t * a,float64x1x2_t b)13447 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
13448   vst1_f64_x2(a, b);
13449 }
13450 
13451 // CHECK-LABEL: @test_vst1_p64_x2(
13452 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
13453 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
13454 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
13455 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
13456 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
13457 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
13458 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13459 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13460 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13461 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
13462 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13463 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13464 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13465 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13466 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13467 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13468 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13469 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13470 // CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13471 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
13472 // CHECK:   ret void
test_vst1_p64_x2(poly64_t * a,poly64x1x2_t b)13473 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
13474   vst1_p64_x2(a, b);
13475 }
13476 
13477 // CHECK-LABEL: @test_vst1q_f64_x3(
13478 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
13479 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
13480 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
13481 // CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
13482 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
13483 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
13484 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13485 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13486 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13487 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
13488 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13489 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13490 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13491 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
13492 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13493 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13494 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13495 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
13496 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13497 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13498 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13499 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13500 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13501 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13502 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]])
13503 // CHECK:   ret void
test_vst1q_f64_x3(float64_t * a,float64x2x3_t b)13504 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
13505   vst1q_f64_x3(a, b);
13506 }
13507 
13508 // CHECK-LABEL: @test_vst1q_p64_x3(
13509 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
13510 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
13511 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
13512 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
13513 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
13514 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
13515 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13516 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13517 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13518 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
13519 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13520 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13521 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13522 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13523 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13524 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13525 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13526 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13527 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13528 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13529 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13530 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13531 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13532 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13533 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
13534 // CHECK:   ret void
test_vst1q_p64_x3(poly64_t * a,poly64x2x3_t b)13535 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
13536   vst1q_p64_x3(a, b);
13537 }
13538 
13539 // CHECK-LABEL: @test_vst1_f64_x3(
13540 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
13541 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
13542 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
13543 // CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
13544 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
13545 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
13546 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13547 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13548 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13549 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
13550 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13551 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13552 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13553 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
13554 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13555 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13556 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13557 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
13558 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13559 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13560 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13561 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13562 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13563 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13564 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]])
13565 // CHECK:   ret void
test_vst1_f64_x3(float64_t * a,float64x1x3_t b)13566 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
13567   vst1_f64_x3(a, b);
13568 }
13569 
13570 // CHECK-LABEL: @test_vst1_p64_x3(
13571 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
13572 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
13573 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
13574 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
13575 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
13576 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
13577 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13578 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13579 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13580 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
13581 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13582 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13583 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13584 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13585 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13586 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13587 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13588 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13589 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13590 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13591 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13592 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13593 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13594 // CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13595 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
13596 // CHECK:   ret void
test_vst1_p64_x3(poly64_t * a,poly64x1x3_t b)13597 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
13598   vst1_p64_x3(a, b);
13599 }
13600 
13601 // CHECK-LABEL: @test_vst1q_f64_x4(
13602 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
13603 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
13604 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
13605 // CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
13606 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
13607 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
13608 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13609 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13610 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13611 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
13612 // CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13613 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13614 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13615 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
13616 // CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13617 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13618 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13619 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
13620 // CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13621 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13622 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13623 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
13624 // CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
13625 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
13626 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13627 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13628 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13629 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
13630 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13631 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]])
13632 // CHECK:   ret void
test_vst1q_f64_x4(float64_t * a,float64x2x4_t b)13633 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
13634   vst1q_f64_x4(a, b);
13635 }
13636 
13637 // CHECK-LABEL: @test_vst1q_p64_x4(
13638 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
13639 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
13640 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
13641 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13642 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
13643 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
13644 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13645 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13646 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13647 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13648 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13649 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13650 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13651 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13652 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13653 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13654 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13655 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13656 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13657 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13658 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13659 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13660 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13661 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13662 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13663 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13664 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13665 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13666 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13667 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
13668 // CHECK:   ret void
test_vst1q_p64_x4(poly64_t * a,poly64x2x4_t b)13669 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
13670   vst1q_p64_x4(a, b);
13671 }
13672 
13673 // CHECK-LABEL: @test_vst1_f64_x4(
13674 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
13675 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
13676 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
13677 // CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
13678 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
13679 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
13680 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13681 // CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13682 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13683 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
13684 // CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13685 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13686 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13687 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
13688 // CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13689 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13690 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13691 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
13692 // CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13693 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13694 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13695 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
13696 // CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
13697 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
13698 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13699 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13700 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13701 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
13702 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13703 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]])
13704 // CHECK:   ret void
test_vst1_f64_x4(float64_t * a,float64x1x4_t b)13705 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
13706   vst1_f64_x4(a, b);
13707 }
13708 
13709 // CHECK-LABEL: @test_vst1_p64_x4(
13710 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
13711 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
13712 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
13713 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13714 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
13715 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
13716 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13717 // CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13718 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13719 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13720 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13721 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13722 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13723 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13724 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13725 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13726 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13727 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13728 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13729 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13730 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13731 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13732 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13733 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13734 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13735 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13736 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13737 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13738 // CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13739 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
13740 // CHECK:   ret void
test_vst1_p64_x4(poly64_t * a,poly64x1x4_t b)13741 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
13742   vst1_p64_x4(a, b);
13743 }
13744 
13745 // CHECK-LABEL: @test_vceqd_s64(
13746 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
13747 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13748 // CHECK:   ret i64 [[VCEQD_I]]
test_vceqd_s64(int64_t a,int64_t b)13749 int64_t test_vceqd_s64(int64_t a, int64_t b) {
13750   return (int64_t)vceqd_s64(a, b);
13751 }
13752 
13753 // CHECK-LABEL: @test_vceqd_u64(
13754 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
13755 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13756 // CHECK:   ret i64 [[VCEQD_I]]
test_vceqd_u64(uint64_t a,uint64_t b)13757 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
13758   return (int64_t)vceqd_u64(a, b);
13759 }
13760 
13761 // CHECK-LABEL: @test_vceqzd_s64(
13762 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
13763 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
13764 // CHECK:   ret i64 [[VCEQZ_I]]
test_vceqzd_s64(int64_t a)13765 int64_t test_vceqzd_s64(int64_t a) {
13766   return (int64_t)vceqzd_s64(a);
13767 }
13768 
13769 // CHECK-LABEL: @test_vceqzd_u64(
13770 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
13771 // CHECK:   [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
13772 // CHECK:   ret i64 [[VCEQZD_I]]
test_vceqzd_u64(int64_t a)13773 int64_t test_vceqzd_u64(int64_t a) {
13774   return (int64_t)vceqzd_u64(a);
13775 }
13776 
13777 // CHECK-LABEL: @test_vcged_s64(
13778 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, %b
13779 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13780 // CHECK:   ret i64 [[VCEQD_I]]
test_vcged_s64(int64_t a,int64_t b)13781 int64_t test_vcged_s64(int64_t a, int64_t b) {
13782   return (int64_t)vcged_s64(a, b);
13783 }
13784 
13785 // CHECK-LABEL: @test_vcged_u64(
13786 // CHECK:   [[TMP0:%.*]] = icmp uge i64 %a, %b
13787 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13788 // CHECK:   ret i64 [[VCEQD_I]]
test_vcged_u64(uint64_t a,uint64_t b)13789 uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
13790   return (uint64_t)vcged_u64(a, b);
13791 }
13792 
13793 // CHECK-LABEL: @test_vcgezd_s64(
13794 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, 0
13795 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13796 // CHECK:   ret i64 [[VCGEZ_I]]
test_vcgezd_s64(int64_t a)13797 int64_t test_vcgezd_s64(int64_t a) {
13798   return (int64_t)vcgezd_s64(a);
13799 }
13800 
13801 // CHECK-LABEL: @test_vcgtd_s64(
13802 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, %b
13803 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13804 // CHECK:   ret i64 [[VCEQD_I]]
test_vcgtd_s64(int64_t a,int64_t b)13805 int64_t test_vcgtd_s64(int64_t a, int64_t b) {
13806   return (int64_t)vcgtd_s64(a, b);
13807 }
13808 
13809 // CHECK-LABEL: @test_vcgtd_u64(
13810 // CHECK:   [[TMP0:%.*]] = icmp ugt i64 %a, %b
13811 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13812 // CHECK:   ret i64 [[VCEQD_I]]
test_vcgtd_u64(uint64_t a,uint64_t b)13813 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
13814   return (uint64_t)vcgtd_u64(a, b);
13815 }
13816 
13817 // CHECK-LABEL: @test_vcgtzd_s64(
13818 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, 0
13819 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13820 // CHECK:   ret i64 [[VCGTZ_I]]
test_vcgtzd_s64(int64_t a)13821 int64_t test_vcgtzd_s64(int64_t a) {
13822   return (int64_t)vcgtzd_s64(a);
13823 }
13824 
13825 // CHECK-LABEL: @test_vcled_s64(
13826 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, %b
13827 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13828 // CHECK:   ret i64 [[VCEQD_I]]
test_vcled_s64(int64_t a,int64_t b)13829 int64_t test_vcled_s64(int64_t a, int64_t b) {
13830   return (int64_t)vcled_s64(a, b);
13831 }
13832 
13833 // CHECK-LABEL: @test_vcled_u64(
13834 // CHECK:   [[TMP0:%.*]] = icmp ule i64 %a, %b
13835 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13836 // CHECK:   ret i64 [[VCEQD_I]]
test_vcled_u64(uint64_t a,uint64_t b)13837 uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
13838   return (uint64_t)vcled_u64(a, b);
13839 }
13840 
13841 // CHECK-LABEL: @test_vclezd_s64(
13842 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, 0
13843 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13844 // CHECK:   ret i64 [[VCLEZ_I]]
test_vclezd_s64(int64_t a)13845 int64_t test_vclezd_s64(int64_t a) {
13846   return (int64_t)vclezd_s64(a);
13847 }
13848 
13849 // CHECK-LABEL: @test_vcltd_s64(
13850 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, %b
13851 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13852 // CHECK:   ret i64 [[VCEQD_I]]
test_vcltd_s64(int64_t a,int64_t b)13853 int64_t test_vcltd_s64(int64_t a, int64_t b) {
13854   return (int64_t)vcltd_s64(a, b);
13855 }
13856 
13857 // CHECK-LABEL: @test_vcltd_u64(
13858 // CHECK:   [[TMP0:%.*]] = icmp ult i64 %a, %b
13859 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13860 // CHECK:   ret i64 [[VCEQD_I]]
test_vcltd_u64(uint64_t a,uint64_t b)13861 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
13862   return (uint64_t)vcltd_u64(a, b);
13863 }
13864 
13865 // CHECK-LABEL: @test_vcltzd_s64(
13866 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, 0
13867 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13868 // CHECK:   ret i64 [[VCLTZ_I]]
test_vcltzd_s64(int64_t a)13869 int64_t test_vcltzd_s64(int64_t a) {
13870   return (int64_t)vcltzd_s64(a);
13871 }
13872 
13873 // CHECK-LABEL: @test_vtstd_s64(
13874 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
13875 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13876 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13877 // CHECK:   ret i64 [[VTSTD_I]]
test_vtstd_s64(int64_t a,int64_t b)13878 int64_t test_vtstd_s64(int64_t a, int64_t b) {
13879   return (int64_t)vtstd_s64(a, b);
13880 }
13881 
13882 // CHECK-LABEL: @test_vtstd_u64(
13883 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
13884 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13885 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13886 // CHECK:   ret i64 [[VTSTD_I]]
test_vtstd_u64(uint64_t a,uint64_t b)13887 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
13888   return (uint64_t)vtstd_u64(a, b);
13889 }
13890 
13891 // CHECK-LABEL: @test_vabsd_s64(
13892 // CHECK:   [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a)
13893 // CHECK:   ret i64 [[VABSD_S64_I]]
test_vabsd_s64(int64_t a)13894 int64_t test_vabsd_s64(int64_t a) {
13895   return (int64_t)vabsd_s64(a);
13896 }
13897 
13898 // CHECK-LABEL: @test_vqabsb_s8(
13899 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13900 // CHECK:   [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
13901 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
13902 // CHECK:   ret i8 [[TMP1]]
test_vqabsb_s8(int8_t a)13903 int8_t test_vqabsb_s8(int8_t a) {
13904   return (int8_t)vqabsb_s8(a);
13905 }
13906 
13907 // CHECK-LABEL: @test_vqabsh_s16(
13908 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13909 // CHECK:   [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
13910 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
13911 // CHECK:   ret i16 [[TMP1]]
test_vqabsh_s16(int16_t a)13912 int16_t test_vqabsh_s16(int16_t a) {
13913   return (int16_t)vqabsh_s16(a);
13914 }
13915 
13916 // CHECK-LABEL: @test_vqabss_s32(
13917 // CHECK:   [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
13918 // CHECK:   ret i32 [[VQABSS_S32_I]]
test_vqabss_s32(int32_t a)13919 int32_t test_vqabss_s32(int32_t a) {
13920   return (int32_t)vqabss_s32(a);
13921 }
13922 
13923 // CHECK-LABEL: @test_vqabsd_s64(
13924 // CHECK:   [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
13925 // CHECK:   ret i64 [[VQABSD_S64_I]]
test_vqabsd_s64(int64_t a)13926 int64_t test_vqabsd_s64(int64_t a) {
13927   return (int64_t)vqabsd_s64(a);
13928 }
13929 
13930 // CHECK-LABEL: @test_vnegd_s64(
13931 // CHECK:   [[VNEGD_I:%.*]] = sub i64 0, %a
13932 // CHECK:   ret i64 [[VNEGD_I]]
test_vnegd_s64(int64_t a)13933 int64_t test_vnegd_s64(int64_t a) {
13934   return (int64_t)vnegd_s64(a);
13935 }
13936 
13937 // CHECK-LABEL: @test_vqnegb_s8(
13938 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13939 // CHECK:   [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
13940 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
13941 // CHECK:   ret i8 [[TMP1]]
test_vqnegb_s8(int8_t a)13942 int8_t test_vqnegb_s8(int8_t a) {
13943   return (int8_t)vqnegb_s8(a);
13944 }
13945 
13946 // CHECK-LABEL: @test_vqnegh_s16(
13947 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13948 // CHECK:   [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
13949 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
13950 // CHECK:   ret i16 [[TMP1]]
test_vqnegh_s16(int16_t a)13951 int16_t test_vqnegh_s16(int16_t a) {
13952   return (int16_t)vqnegh_s16(a);
13953 }
13954 
13955 // CHECK-LABEL: @test_vqnegs_s32(
13956 // CHECK:   [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
13957 // CHECK:   ret i32 [[VQNEGS_S32_I]]
test_vqnegs_s32(int32_t a)13958 int32_t test_vqnegs_s32(int32_t a) {
13959   return (int32_t)vqnegs_s32(a);
13960 }
13961 
13962 // CHECK-LABEL: @test_vqnegd_s64(
13963 // CHECK:   [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
13964 // CHECK:   ret i64 [[VQNEGD_S64_I]]
test_vqnegd_s64(int64_t a)13965 int64_t test_vqnegd_s64(int64_t a) {
13966   return (int64_t)vqnegd_s64(a);
13967 }
13968 
13969 // CHECK-LABEL: @test_vuqaddb_s8(
13970 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13971 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
13972 // CHECK:   [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13973 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
13974 // CHECK:   ret i8 [[TMP2]]
test_vuqaddb_s8(int8_t a,uint8_t b)13975 int8_t test_vuqaddb_s8(int8_t a, uint8_t b) {
13976   return (int8_t)vuqaddb_s8(a, b);
13977 }
13978 
13979 // CHECK-LABEL: @test_vuqaddh_s16(
13980 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13981 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13982 // CHECK:   [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13983 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
13984 // CHECK:   ret i16 [[TMP2]]
test_vuqaddh_s16(int16_t a,uint16_t b)13985 int16_t test_vuqaddh_s16(int16_t a, uint16_t b) {
13986   return (int16_t)vuqaddh_s16(a, b);
13987 }
13988 
13989 // CHECK-LABEL: @test_vuqadds_s32(
13990 // CHECK:   [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
13991 // CHECK:   ret i32 [[VUQADDS_S32_I]]
test_vuqadds_s32(int32_t a,uint32_t b)13992 int32_t test_vuqadds_s32(int32_t a, uint32_t b) {
13993   return (int32_t)vuqadds_s32(a, b);
13994 }
13995 
13996 // CHECK-LABEL: @test_vuqaddd_s64(
13997 // CHECK:   [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
13998 // CHECK:   ret i64 [[VUQADDD_S64_I]]
test_vuqaddd_s64(int64_t a,uint64_t b)13999 int64_t test_vuqaddd_s64(int64_t a, uint64_t b) {
14000   return (int64_t)vuqaddd_s64(a, b);
14001 }
14002 
14003 // CHECK-LABEL: @test_vsqaddb_u8(
14004 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14005 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
14006 // CHECK:   [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
14007 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
14008 // CHECK:   ret i8 [[TMP2]]
test_vsqaddb_u8(uint8_t a,int8_t b)14009 uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
14010   return (uint8_t)vsqaddb_u8(a, b);
14011 }
14012 
14013 // CHECK-LABEL: @test_vsqaddh_u16(
14014 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14015 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
14016 // CHECK:   [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
14017 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
14018 // CHECK:   ret i16 [[TMP2]]
test_vsqaddh_u16(uint16_t a,int16_t b)14019 uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
14020   return (uint16_t)vsqaddh_u16(a, b);
14021 }
14022 
14023 // CHECK-LABEL: @test_vsqadds_u32(
14024 // CHECK:   [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
14025 // CHECK:   ret i32 [[VSQADDS_U32_I]]
test_vsqadds_u32(uint32_t a,int32_t b)14026 uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
14027   return (uint32_t)vsqadds_u32(a, b);
14028 }
14029 
14030 // CHECK-LABEL: @test_vsqaddd_u64(
14031 // CHECK:   [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
14032 // CHECK:   ret i64 [[VSQADDD_U64_I]]
test_vsqaddd_u64(uint64_t a,int64_t b)14033 uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
14034   return (uint64_t)vsqaddd_u64(a, b);
14035 }
14036 
14037 // CHECK-LABEL: @test_vqdmlalh_s16(
14038 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
14039 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
14040 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
14041 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
14042 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
14043 // CHECK:   ret i32 [[VQDMLXL1_I]]
test_vqdmlalh_s16(int32_t a,int16_t b,int16_t c)14044 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
14045   return (int32_t)vqdmlalh_s16(a, b, c);
14046 }
14047 
14048 // CHECK-LABEL: @test_vqdmlals_s32(
14049 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
14050 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
14051 // CHECK:   ret i64 [[VQDMLXL1_I]]
test_vqdmlals_s32(int64_t a,int32_t b,int32_t c)14052 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
14053   return (int64_t)vqdmlals_s32(a, b, c);
14054 }
14055 
14056 // CHECK-LABEL: @test_vqdmlslh_s16(
14057 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
14058 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
14059 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
14060 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
14061 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
14062 // CHECK:   ret i32 [[VQDMLXL1_I]]
test_vqdmlslh_s16(int32_t a,int16_t b,int16_t c)14063 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
14064   return (int32_t)vqdmlslh_s16(a, b, c);
14065 }
14066 
14067 // CHECK-LABEL: @test_vqdmlsls_s32(
14068 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
14069 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
14070 // CHECK:   ret i64 [[VQDMLXL1_I]]
test_vqdmlsls_s32(int64_t a,int32_t b,int32_t c)14071 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
14072   return (int64_t)vqdmlsls_s32(a, b, c);
14073 }
14074 
14075 // CHECK-LABEL: @test_vqdmullh_s16(
14076 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14077 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
14078 // CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
14079 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
14080 // CHECK:   ret i32 [[TMP2]]
test_vqdmullh_s16(int16_t a,int16_t b)14081 int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
14082   return (int32_t)vqdmullh_s16(a, b);
14083 }
14084 
14085 // CHECK-LABEL: @test_vqdmulls_s32(
14086 // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
14087 // CHECK:   ret i64 [[VQDMULLS_S32_I]]
test_vqdmulls_s32(int32_t a,int32_t b)14088 int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
14089   return (int64_t)vqdmulls_s32(a, b);
14090 }
14091 
14092 // CHECK-LABEL: @test_vqmovunh_s16(
14093 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14094 // CHECK:   [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
14095 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
14096 // CHECK:   ret i8 [[TMP1]]
test_vqmovunh_s16(int16_t a)14097 int8_t test_vqmovunh_s16(int16_t a) {
14098   return (int8_t)vqmovunh_s16(a);
14099 }
14100 
14101 // CHECK-LABEL: @test_vqmovuns_s32(
14102 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14103 // CHECK:   [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
14104 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
14105 // CHECK:   ret i16 [[TMP1]]
test_vqmovuns_s32(int32_t a)14106 int16_t test_vqmovuns_s32(int32_t a) {
14107   return (int16_t)vqmovuns_s32(a);
14108 }
14109 
14110 // CHECK-LABEL: @test_vqmovund_s64(
14111 // CHECK:   [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
14112 // CHECK:   ret i32 [[VQMOVUND_S64_I]]
test_vqmovund_s64(int64_t a)14113 int32_t test_vqmovund_s64(int64_t a) {
14114   return (int32_t)vqmovund_s64(a);
14115 }
14116 
14117 // CHECK-LABEL: @test_vqmovnh_s16(
14118 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14119 // CHECK:   [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
14120 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
14121 // CHECK:   ret i8 [[TMP1]]
test_vqmovnh_s16(int16_t a)14122 int8_t test_vqmovnh_s16(int16_t a) {
14123   return (int8_t)vqmovnh_s16(a);
14124 }
14125 
14126 // CHECK-LABEL: @test_vqmovns_s32(
14127 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14128 // CHECK:   [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
14129 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
14130 // CHECK:   ret i16 [[TMP1]]
test_vqmovns_s32(int32_t a)14131 int16_t test_vqmovns_s32(int32_t a) {
14132   return (int16_t)vqmovns_s32(a);
14133 }
14134 
14135 // CHECK-LABEL: @test_vqmovnd_s64(
14136 // CHECK:   [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
14137 // CHECK:   ret i32 [[VQMOVND_S64_I]]
test_vqmovnd_s64(int64_t a)14138 int32_t test_vqmovnd_s64(int64_t a) {
14139   return (int32_t)vqmovnd_s64(a);
14140 }
14141 
14142 // CHECK-LABEL: @test_vqmovnh_u16(
14143 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14144 // CHECK:   [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
14145 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
14146 // CHECK:   ret i8 [[TMP1]]
test_vqmovnh_u16(int16_t a)14147 int8_t test_vqmovnh_u16(int16_t a) {
14148   return (int8_t)vqmovnh_u16(a);
14149 }
14150 
14151 // CHECK-LABEL: @test_vqmovns_u32(
14152 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14153 // CHECK:   [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
14154 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
14155 // CHECK:   ret i16 [[TMP1]]
test_vqmovns_u32(int32_t a)14156 int16_t test_vqmovns_u32(int32_t a) {
14157   return (int16_t)vqmovns_u32(a);
14158 }
14159 
14160 // CHECK-LABEL: @test_vqmovnd_u64(
14161 // CHECK:   [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
14162 // CHECK:   ret i32 [[VQMOVND_U64_I]]
test_vqmovnd_u64(int64_t a)14163 int32_t test_vqmovnd_u64(int64_t a) {
14164   return (int32_t)vqmovnd_u64(a);
14165 }
14166 
14167 // CHECK-LABEL: @test_vceqs_f32(
14168 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, %b
14169 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14170 // CHECK:   ret i32 [[VCMPD_I]]
test_vceqs_f32(float32_t a,float32_t b)14171 uint32_t test_vceqs_f32(float32_t a, float32_t b) {
14172   return (uint32_t)vceqs_f32(a, b);
14173 }
14174 
14175 // CHECK-LABEL: @test_vceqd_f64(
14176 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, %b
14177 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14178 // CHECK:   ret i64 [[VCMPD_I]]
test_vceqd_f64(float64_t a,float64_t b)14179 uint64_t test_vceqd_f64(float64_t a, float64_t b) {
14180   return (uint64_t)vceqd_f64(a, b);
14181 }
14182 
14183 // CHECK-LABEL: @test_vceqzs_f32(
14184 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
14185 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
14186 // CHECK:   ret i32 [[VCEQZ_I]]
test_vceqzs_f32(float32_t a)14187 uint32_t test_vceqzs_f32(float32_t a) {
14188   return (uint32_t)vceqzs_f32(a);
14189 }
14190 
14191 // CHECK-LABEL: @test_vceqzd_f64(
14192 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
14193 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
14194 // CHECK:   ret i64 [[VCEQZ_I]]
test_vceqzd_f64(float64_t a)14195 uint64_t test_vceqzd_f64(float64_t a) {
14196   return (uint64_t)vceqzd_f64(a);
14197 }
14198 
14199 // CHECK-LABEL: @test_vcges_f32(
14200 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, %b
14201 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14202 // CHECK:   ret i32 [[VCMPD_I]]
test_vcges_f32(float32_t a,float32_t b)14203 uint32_t test_vcges_f32(float32_t a, float32_t b) {
14204   return (uint32_t)vcges_f32(a, b);
14205 }
14206 
14207 // CHECK-LABEL: @test_vcged_f64(
14208 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, %b
14209 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14210 // CHECK:   ret i64 [[VCMPD_I]]
test_vcged_f64(float64_t a,float64_t b)14211 uint64_t test_vcged_f64(float64_t a, float64_t b) {
14212   return (uint64_t)vcged_f64(a, b);
14213 }
14214 
14215 // CHECK-LABEL: @test_vcgezs_f32(
14216 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
14217 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14218 // CHECK:   ret i32 [[VCGEZ_I]]
test_vcgezs_f32(float32_t a)14219 uint32_t test_vcgezs_f32(float32_t a) {
14220   return (uint32_t)vcgezs_f32(a);
14221 }
14222 
14223 // CHECK-LABEL: @test_vcgezd_f64(
14224 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
14225 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14226 // CHECK:   ret i64 [[VCGEZ_I]]
test_vcgezd_f64(float64_t a)14227 uint64_t test_vcgezd_f64(float64_t a) {
14228   return (uint64_t)vcgezd_f64(a);
14229 }
14230 
14231 // CHECK-LABEL: @test_vcgts_f32(
14232 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, %b
14233 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14234 // CHECK:   ret i32 [[VCMPD_I]]
test_vcgts_f32(float32_t a,float32_t b)14235 uint32_t test_vcgts_f32(float32_t a, float32_t b) {
14236   return (uint32_t)vcgts_f32(a, b);
14237 }
14238 
14239 // CHECK-LABEL: @test_vcgtd_f64(
14240 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, %b
14241 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14242 // CHECK:   ret i64 [[VCMPD_I]]
test_vcgtd_f64(float64_t a,float64_t b)14243 uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
14244   return (uint64_t)vcgtd_f64(a, b);
14245 }
14246 
14247 // CHECK-LABEL: @test_vcgtzs_f32(
14248 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
14249 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14250 // CHECK:   ret i32 [[VCGTZ_I]]
test_vcgtzs_f32(float32_t a)14251 uint32_t test_vcgtzs_f32(float32_t a) {
14252   return (uint32_t)vcgtzs_f32(a);
14253 }
14254 
14255 // CHECK-LABEL: @test_vcgtzd_f64(
14256 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
14257 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14258 // CHECK:   ret i64 [[VCGTZ_I]]
test_vcgtzd_f64(float64_t a)14259 uint64_t test_vcgtzd_f64(float64_t a) {
14260   return (uint64_t)vcgtzd_f64(a);
14261 }
14262 
14263 // CHECK-LABEL: @test_vcles_f32(
14264 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, %b
14265 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14266 // CHECK:   ret i32 [[VCMPD_I]]
test_vcles_f32(float32_t a,float32_t b)14267 uint32_t test_vcles_f32(float32_t a, float32_t b) {
14268   return (uint32_t)vcles_f32(a, b);
14269 }
14270 
14271 // CHECK-LABEL: @test_vcled_f64(
14272 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, %b
14273 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14274 // CHECK:   ret i64 [[VCMPD_I]]
test_vcled_f64(float64_t a,float64_t b)14275 uint64_t test_vcled_f64(float64_t a, float64_t b) {
14276   return (uint64_t)vcled_f64(a, b);
14277 }
14278 
14279 // CHECK-LABEL: @test_vclezs_f32(
14280 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
14281 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14282 // CHECK:   ret i32 [[VCLEZ_I]]
test_vclezs_f32(float32_t a)14283 uint32_t test_vclezs_f32(float32_t a) {
14284   return (uint32_t)vclezs_f32(a);
14285 }
14286 
14287 // CHECK-LABEL: @test_vclezd_f64(
14288 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
14289 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14290 // CHECK:   ret i64 [[VCLEZ_I]]
test_vclezd_f64(float64_t a)14291 uint64_t test_vclezd_f64(float64_t a) {
14292   return (uint64_t)vclezd_f64(a);
14293 }
14294 
14295 // CHECK-LABEL: @test_vclts_f32(
14296 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, %b
14297 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14298 // CHECK:   ret i32 [[VCMPD_I]]
test_vclts_f32(float32_t a,float32_t b)14299 uint32_t test_vclts_f32(float32_t a, float32_t b) {
14300   return (uint32_t)vclts_f32(a, b);
14301 }
14302 
14303 // CHECK-LABEL: @test_vcltd_f64(
14304 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, %b
14305 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14306 // CHECK:   ret i64 [[VCMPD_I]]
test_vcltd_f64(float64_t a,float64_t b)14307 uint64_t test_vcltd_f64(float64_t a, float64_t b) {
14308   return (uint64_t)vcltd_f64(a, b);
14309 }
14310 
14311 // CHECK-LABEL: @test_vcltzs_f32(
14312 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
14313 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14314 // CHECK:   ret i32 [[VCLTZ_I]]
test_vcltzs_f32(float32_t a)14315 uint32_t test_vcltzs_f32(float32_t a) {
14316   return (uint32_t)vcltzs_f32(a);
14317 }
14318 
14319 // CHECK-LABEL: @test_vcltzd_f64(
14320 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
14321 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14322 // CHECK:   ret i64 [[VCLTZ_I]]
test_vcltzd_f64(float64_t a)14323 uint64_t test_vcltzd_f64(float64_t a) {
14324   return (uint64_t)vcltzd_f64(a);
14325 }
14326 
14327 // CHECK-LABEL: @test_vcages_f32(
14328 // CHECK:   [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b)
14329 // CHECK:   ret i32 [[VCAGES_F32_I]]
test_vcages_f32(float32_t a,float32_t b)14330 uint32_t test_vcages_f32(float32_t a, float32_t b) {
14331   return (uint32_t)vcages_f32(a, b);
14332 }
14333 
14334 // CHECK-LABEL: @test_vcaged_f64(
14335 // CHECK:   [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b)
14336 // CHECK:   ret i64 [[VCAGED_F64_I]]
test_vcaged_f64(float64_t a,float64_t b)14337 uint64_t test_vcaged_f64(float64_t a, float64_t b) {
14338   return (uint64_t)vcaged_f64(a, b);
14339 }
14340 
14341 // CHECK-LABEL: @test_vcagts_f32(
14342 // CHECK:   [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b)
14343 // CHECK:   ret i32 [[VCAGTS_F32_I]]
test_vcagts_f32(float32_t a,float32_t b)14344 uint32_t test_vcagts_f32(float32_t a, float32_t b) {
14345   return (uint32_t)vcagts_f32(a, b);
14346 }
14347 
14348 // CHECK-LABEL: @test_vcagtd_f64(
14349 // CHECK:   [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b)
14350 // CHECK:   ret i64 [[VCAGTD_F64_I]]
test_vcagtd_f64(float64_t a,float64_t b)14351 uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
14352   return (uint64_t)vcagtd_f64(a, b);
14353 }
14354 
14355 // CHECK-LABEL: @test_vcales_f32(
14356 // CHECK:   [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a)
14357 // CHECK:   ret i32 [[VCALES_F32_I]]
test_vcales_f32(float32_t a,float32_t b)14358 uint32_t test_vcales_f32(float32_t a, float32_t b) {
14359   return (uint32_t)vcales_f32(a, b);
14360 }
14361 
14362 // CHECK-LABEL: @test_vcaled_f64(
14363 // CHECK:   [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a)
14364 // CHECK:   ret i64 [[VCALED_F64_I]]
test_vcaled_f64(float64_t a,float64_t b)14365 uint64_t test_vcaled_f64(float64_t a, float64_t b) {
14366   return (uint64_t)vcaled_f64(a, b);
14367 }
14368 
14369 // CHECK-LABEL: @test_vcalts_f32(
14370 // CHECK:   [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
14371 // CHECK:   ret i32 [[VCALTS_F32_I]]
test_vcalts_f32(float32_t a,float32_t b)14372 uint32_t test_vcalts_f32(float32_t a, float32_t b) {
14373   return (uint32_t)vcalts_f32(a, b);
14374 }
14375 
14376 // CHECK-LABEL: @test_vcaltd_f64(
14377 // CHECK:   [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a)
14378 // CHECK:   ret i64 [[VCALTD_F64_I]]
test_vcaltd_f64(float64_t a,float64_t b)14379 uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
14380   return (uint64_t)vcaltd_f64(a, b);
14381 }
14382 
14383 // CHECK-LABEL: @test_vshrd_n_s64(
14384 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %a, 1
14385 // CHECK:   ret i64 [[SHRD_N]]
test_vshrd_n_s64(int64_t a)14386 int64_t test_vshrd_n_s64(int64_t a) {
14387   return (int64_t)vshrd_n_s64(a, 1);
14388 }
14389 
14390 // CHECK-LABEL: @test_vshr_n_s64(
14391 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14392 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14393 // CHECK:   [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14394 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)14395 int64x1_t test_vshr_n_s64(int64x1_t a) {
14396   return vshr_n_s64(a, 1);
14397 }
14398 
14399 // CHECK-LABEL: @test_vshrd_n_u64(
14400 // CHECK:   ret i64 0
test_vshrd_n_u64(uint64_t a)14401 uint64_t test_vshrd_n_u64(uint64_t a) {
14402   return (uint64_t)vshrd_n_u64(a, 64);
14403 }
14404 
14405 // CHECK-LABEL: @test_vshrd_n_u64_2(
14406 // CHECK:   ret i64 0
test_vshrd_n_u64_2()14407 uint64_t test_vshrd_n_u64_2() {
14408   uint64_t a = UINT64_C(0xf000000000000000);
14409   return vshrd_n_u64(a, 64);
14410 }
14411 
14412 // CHECK-LABEL: @test_vshr_n_u64(
14413 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14414 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14415 // CHECK:   [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14416 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)14417 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14418   return vshr_n_u64(a, 1);
14419 }
14420 
14421 // CHECK-LABEL: @test_vrshrd_n_s64(
14422 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
14423 // CHECK:   ret i64 [[VRSHR_N]]
test_vrshrd_n_s64(int64_t a)14424 int64_t test_vrshrd_n_s64(int64_t a) {
14425   return (int64_t)vrshrd_n_s64(a, 63);
14426 }
14427 
14428 // CHECK-LABEL: @test_vrshr_n_s64(
14429 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14430 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14431 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14432 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)14433 int64x1_t test_vrshr_n_s64(int64x1_t a) {
14434   return vrshr_n_s64(a, 1);
14435 }
14436 
14437 // CHECK-LABEL: @test_vrshrd_n_u64(
14438 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
14439 // CHECK:   ret i64 [[VRSHR_N]]
test_vrshrd_n_u64(uint64_t a)14440 uint64_t test_vrshrd_n_u64(uint64_t a) {
14441   return (uint64_t)vrshrd_n_u64(a, 63);
14442 }
14443 
14444 // CHECK-LABEL: @test_vrshr_n_u64(
14445 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14446 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14447 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14448 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)14449 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
14450   return vrshr_n_u64(a, 1);
14451 }
14452 
14453 // CHECK-LABEL: @test_vsrad_n_s64(
14454 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %b, 63
14455 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14456 // CHECK:   ret i64 [[TMP0]]
test_vsrad_n_s64(int64_t a,int64_t b)14457 int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
14458   return (int64_t)vsrad_n_s64(a, b, 63);
14459 }
14460 
14461 // CHECK-LABEL: @test_vsra_n_s64(
14462 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14463 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14464 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14465 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14466 // CHECK:   [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
14467 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14468 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)14469 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
14470   return vsra_n_s64(a, b, 1);
14471 }
14472 
14473 // CHECK-LABEL: @test_vsrad_n_u64(
14474 // CHECK:   [[SHRD_N:%.*]] = lshr i64 %b, 63
14475 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14476 // CHECK:   ret i64 [[TMP0]]
test_vsrad_n_u64(uint64_t a,uint64_t b)14477 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
14478   return (uint64_t)vsrad_n_u64(a, b, 63);
14479 }
14480 
14481 // CHECK-LABEL: @test_vsrad_n_u64_2(
14482 // CHECK:   ret i64 %a
test_vsrad_n_u64_2(uint64_t a,uint64_t b)14483 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
14484   return (uint64_t)vsrad_n_u64(a, b, 64);
14485 }
14486 
14487 // CHECK-LABEL: @test_vsra_n_u64(
14488 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14489 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14490 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14491 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14492 // CHECK:   [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
14493 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14494 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)14495 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
14496   return vsra_n_u64(a, b, 1);
14497 }
14498 
14499 // CHECK-LABEL: @test_vrsrad_n_s64(
14500 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
14501 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14502 // CHECK:   ret i64 [[TMP1]]
test_vrsrad_n_s64(int64_t a,int64_t b)14503 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
14504   return (int64_t)vrsrad_n_s64(a, b, 63);
14505 }
14506 
14507 // CHECK-LABEL: @test_vrsra_n_s64(
14508 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14509 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14510 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14511 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14512 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14513 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14514 // CHECK:   ret <1 x i64> [[TMP3]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)14515 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
14516   return vrsra_n_s64(a, b, 1);
14517 }
14518 
14519 // CHECK-LABEL: @test_vrsrad_n_u64(
14520 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
14521 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14522 // CHECK:   ret i64 [[TMP1]]
test_vrsrad_n_u64(uint64_t a,uint64_t b)14523 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
14524   return (uint64_t)vrsrad_n_u64(a, b, 63);
14525 }
14526 
14527 // CHECK-LABEL: @test_vrsra_n_u64(
14528 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14529 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14530 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14531 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14532 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14533 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14534 // CHECK:   ret <1 x i64> [[TMP3]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)14535 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
14536   return vrsra_n_u64(a, b, 1);
14537 }
14538 
14539 // CHECK-LABEL: @test_vshld_n_s64(
14540 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 1
14541 // CHECK:   ret i64 [[SHLD_N]]
test_vshld_n_s64(int64_t a)14542 int64_t test_vshld_n_s64(int64_t a) {
14543   return (int64_t)vshld_n_s64(a, 1);
14544 }
14545 
14546 // CHECK-LABEL: @test_vshl_n_s64(
14547 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14548 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14549 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14550 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)14551 int64x1_t test_vshl_n_s64(int64x1_t a) {
14552   return vshl_n_s64(a, 1);
14553 }
14554 
14555 // CHECK-LABEL: @test_vshld_n_u64(
14556 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 63
14557 // CHECK:   ret i64 [[SHLD_N]]
test_vshld_n_u64(uint64_t a)14558 uint64_t test_vshld_n_u64(uint64_t a) {
14559   return (uint64_t)vshld_n_u64(a, 63);
14560 }
14561 
14562 // CHECK-LABEL: @test_vshl_n_u64(
14563 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14564 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14565 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14566 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)14567 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14568   return vshl_n_u64(a, 1);
14569 }
14570 
14571 // CHECK-LABEL: @test_vqshlb_n_s8(
14572 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14573 // CHECK:   [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14574 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
14575 // CHECK:   ret i8 [[TMP1]]
test_vqshlb_n_s8(int8_t a)14576 int8_t test_vqshlb_n_s8(int8_t a) {
14577   return (int8_t)vqshlb_n_s8(a, 7);
14578 }
14579 
14580 // CHECK-LABEL: @test_vqshlh_n_s16(
14581 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14582 // CHECK:   [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14583 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
14584 // CHECK:   ret i16 [[TMP1]]
test_vqshlh_n_s16(int16_t a)14585 int16_t test_vqshlh_n_s16(int16_t a) {
14586   return (int16_t)vqshlh_n_s16(a, 15);
14587 }
14588 
14589 // CHECK-LABEL: @test_vqshls_n_s32(
14590 // CHECK:   [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
14591 // CHECK:   ret i32 [[VQSHLS_N_S32]]
test_vqshls_n_s32(int32_t a)14592 int32_t test_vqshls_n_s32(int32_t a) {
14593   return (int32_t)vqshls_n_s32(a, 31);
14594 }
14595 
14596 // CHECK-LABEL: @test_vqshld_n_s64(
14597 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
14598 // CHECK:   ret i64 [[VQSHL_N]]
test_vqshld_n_s64(int64_t a)14599 int64_t test_vqshld_n_s64(int64_t a) {
14600   return (int64_t)vqshld_n_s64(a, 63);
14601 }
14602 
14603 // CHECK-LABEL: @test_vqshl_n_s8(
14604 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14605 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)14606 int8x8_t test_vqshl_n_s8(int8x8_t a) {
14607   return vqshl_n_s8(a, 0);
14608 }
14609 
14610 // CHECK-LABEL: @test_vqshlq_n_s8(
14611 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14612 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)14613 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
14614   return vqshlq_n_s8(a, 0);
14615 }
14616 
14617 // CHECK-LABEL: @test_vqshl_n_s16(
14618 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14619 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14620 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14621 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)14622 int16x4_t test_vqshl_n_s16(int16x4_t a) {
14623   return vqshl_n_s16(a, 0);
14624 }
14625 
14626 // CHECK-LABEL: @test_vqshlq_n_s16(
14627 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14628 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14629 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14630 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)14631 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
14632   return vqshlq_n_s16(a, 0);
14633 }
14634 
14635 // CHECK-LABEL: @test_vqshl_n_s32(
14636 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14637 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14638 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14639 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)14640 int32x2_t test_vqshl_n_s32(int32x2_t a) {
14641   return vqshl_n_s32(a, 0);
14642 }
14643 
14644 // CHECK-LABEL: @test_vqshlq_n_s32(
14645 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14646 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14647 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14648 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)14649 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
14650   return vqshlq_n_s32(a, 0);
14651 }
14652 
14653 // CHECK-LABEL: @test_vqshlq_n_s64(
14654 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14655 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14656 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14657 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)14658 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
14659   return vqshlq_n_s64(a, 0);
14660 }
14661 
14662 // CHECK-LABEL: @test_vqshl_n_u8(
14663 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14664 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)14665 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
14666   return vqshl_n_u8(a, 0);
14667 }
14668 
14669 // CHECK-LABEL: @test_vqshlq_n_u8(
14670 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14671 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)14672 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
14673   return vqshlq_n_u8(a, 0);
14674 }
14675 
14676 // CHECK-LABEL: @test_vqshl_n_u16(
14677 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14678 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14679 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14680 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)14681 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
14682   return vqshl_n_u16(a, 0);
14683 }
14684 
14685 // CHECK-LABEL: @test_vqshlq_n_u16(
14686 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14687 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14688 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14689 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)14690 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
14691   return vqshlq_n_u16(a, 0);
14692 }
14693 
14694 // CHECK-LABEL: @test_vqshl_n_u32(
14695 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14696 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14697 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14698 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)14699 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
14700   return vqshl_n_u32(a, 0);
14701 }
14702 
14703 // CHECK-LABEL: @test_vqshlq_n_u32(
14704 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14705 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14706 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14707 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)14708 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
14709   return vqshlq_n_u32(a, 0);
14710 }
14711 
14712 // CHECK-LABEL: @test_vqshlq_n_u64(
14713 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14714 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14715 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14716 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)14717 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
14718   return vqshlq_n_u64(a, 0);
14719 }
14720 
14721 // CHECK-LABEL: @test_vqshl_n_s64(
14722 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14723 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14724 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14725 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)14726 int64x1_t test_vqshl_n_s64(int64x1_t a) {
14727   return vqshl_n_s64(a, 1);
14728 }
14729 
14730 // CHECK-LABEL: @test_vqshlb_n_u8(
14731 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14732 // CHECK:   [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14733 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
14734 // CHECK:   ret i8 [[TMP1]]
test_vqshlb_n_u8(uint8_t a)14735 uint8_t test_vqshlb_n_u8(uint8_t a) {
14736   return (uint8_t)vqshlb_n_u8(a, 7);
14737 }
14738 
14739 // CHECK-LABEL: @test_vqshlh_n_u16(
14740 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14741 // CHECK:   [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14742 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
14743 // CHECK:   ret i16 [[TMP1]]
test_vqshlh_n_u16(uint16_t a)14744 uint16_t test_vqshlh_n_u16(uint16_t a) {
14745   return (uint16_t)vqshlh_n_u16(a, 15);
14746 }
14747 
14748 // CHECK-LABEL: @test_vqshls_n_u32(
14749 // CHECK:   [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
14750 // CHECK:   ret i32 [[VQSHLS_N_U32]]
test_vqshls_n_u32(uint32_t a)14751 uint32_t test_vqshls_n_u32(uint32_t a) {
14752   return (uint32_t)vqshls_n_u32(a, 31);
14753 }
14754 
14755 // CHECK-LABEL: @test_vqshld_n_u64(
14756 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
14757 // CHECK:   ret i64 [[VQSHL_N]]
test_vqshld_n_u64(uint64_t a)14758 uint64_t test_vqshld_n_u64(uint64_t a) {
14759   return (uint64_t)vqshld_n_u64(a, 63);
14760 }
14761 
14762 // CHECK-LABEL: @test_vqshl_n_u64(
14763 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14764 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14765 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14766 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)14767 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
14768   return vqshl_n_u64(a, 1);
14769 }
14770 
14771 // CHECK-LABEL: @test_vqshlub_n_s8(
14772 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14773 // CHECK:   [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14774 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
14775 // CHECK:   ret i8 [[TMP1]]
test_vqshlub_n_s8(int8_t a)14776 int8_t test_vqshlub_n_s8(int8_t a) {
14777   return (int8_t)vqshlub_n_s8(a, 7);
14778 }
14779 
14780 // CHECK-LABEL: @test_vqshluh_n_s16(
14781 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14782 // CHECK:   [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14783 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
14784 // CHECK:   ret i16 [[TMP1]]
test_vqshluh_n_s16(int16_t a)14785 int16_t test_vqshluh_n_s16(int16_t a) {
14786   return (int16_t)vqshluh_n_s16(a, 15);
14787 }
14788 
14789 // CHECK-LABEL: @test_vqshlus_n_s32(
14790 // CHECK:   [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
14791 // CHECK:   ret i32 [[VQSHLUS_N_S32]]
test_vqshlus_n_s32(int32_t a)14792 int32_t test_vqshlus_n_s32(int32_t a) {
14793   return (int32_t)vqshlus_n_s32(a, 31);
14794 }
14795 
14796 // CHECK-LABEL: @test_vqshlud_n_s64(
14797 // CHECK:   [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
14798 // CHECK:   ret i64 [[VQSHLU_N]]
test_vqshlud_n_s64(int64_t a)14799 int64_t test_vqshlud_n_s64(int64_t a) {
14800   return (int64_t)vqshlud_n_s64(a, 63);
14801 }
14802 
14803 // CHECK-LABEL: @test_vqshlu_n_s64(
14804 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14805 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14806 // CHECK:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
14807 // CHECK:   ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)14808 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
14809   return vqshlu_n_s64(a, 1);
14810 }
14811 
14812 // CHECK-LABEL: @test_vsrid_n_s64(
14813 // CHECK:   [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14814 // CHECK:   [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14815 // CHECK:   [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
14816 // CHECK:   [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
14817 // CHECK:   ret i64 [[VSRID_N_S643]]
test_vsrid_n_s64(int64_t a,int64_t b)14818 int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
14819   return (int64_t)vsrid_n_s64(a, b, 63);
14820 }
14821 
14822 // CHECK-LABEL: @test_vsri_n_s64(
14823 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14824 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14825 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14826 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14827 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14828 // CHECK:   ret <1 x i64> [[VSRI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)14829 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
14830   return vsri_n_s64(a, b, 1);
14831 }
14832 
14833 // CHECK-LABEL: @test_vsrid_n_u64(
14834 // CHECK:   [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14835 // CHECK:   [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14836 // CHECK:   [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
14837 // CHECK:   [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
14838 // CHECK:   ret i64 [[VSRID_N_U643]]
test_vsrid_n_u64(uint64_t a,uint64_t b)14839 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
14840   return (uint64_t)vsrid_n_u64(a, b, 63);
14841 }
14842 
14843 // CHECK-LABEL: @test_vsri_n_u64(
14844 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14845 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14846 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14847 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14848 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14849 // CHECK:   ret <1 x i64> [[VSRI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)14850 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
14851   return vsri_n_u64(a, b, 1);
14852 }
14853 
14854 // CHECK-LABEL: @test_vslid_n_s64(
14855 // CHECK:   [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14856 // CHECK:   [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14857 // CHECK:   [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
14858 // CHECK:   [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
14859 // CHECK:   ret i64 [[VSLID_N_S643]]
test_vslid_n_s64(int64_t a,int64_t b)14860 int64_t test_vslid_n_s64(int64_t a, int64_t b) {
14861   return (int64_t)vslid_n_s64(a, b, 63);
14862 }
14863 
14864 // CHECK-LABEL: @test_vsli_n_s64(
14865 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14866 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14867 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14868 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14869 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14870 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)14871 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14872   return vsli_n_s64(a, b, 1);
14873 }
14874 
14875 // CHECK-LABEL: @test_vslid_n_u64(
14876 // CHECK:   [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14877 // CHECK:   [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14878 // CHECK:   [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
14879 // CHECK:   [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
14880 // CHECK:   ret i64 [[VSLID_N_U643]]
test_vslid_n_u64(uint64_t a,uint64_t b)14881 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
14882   return (uint64_t)vslid_n_u64(a, b, 63);
14883 }
14884 
14885 // CHECK-LABEL: @test_vsli_n_u64(
14886 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14887 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14888 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14889 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14890 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14891 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)14892 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
14893   return vsli_n_u64(a, b, 1);
14894 }
14895 
14896 // CHECK-LABEL: @test_vqshrnh_n_s16(
14897 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14898 // CHECK:   [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14899 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
14900 // CHECK:   ret i8 [[TMP1]]
test_vqshrnh_n_s16(int16_t a)14901 int8_t test_vqshrnh_n_s16(int16_t a) {
14902   return (int8_t)vqshrnh_n_s16(a, 8);
14903 }
14904 
14905 // CHECK-LABEL: @test_vqshrns_n_s32(
14906 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14907 // CHECK:   [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14908 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
14909 // CHECK:   ret i16 [[TMP1]]
test_vqshrns_n_s32(int32_t a)14910 int16_t test_vqshrns_n_s32(int32_t a) {
14911   return (int16_t)vqshrns_n_s32(a, 16);
14912 }
14913 
14914 // CHECK-LABEL: @test_vqshrnd_n_s64(
14915 // CHECK:   [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
14916 // CHECK:   ret i32 [[VQSHRND_N_S64]]
test_vqshrnd_n_s64(int64_t a)14917 int32_t test_vqshrnd_n_s64(int64_t a) {
14918   return (int32_t)vqshrnd_n_s64(a, 32);
14919 }
14920 
14921 // CHECK-LABEL: @test_vqshrnh_n_u16(
14922 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14923 // CHECK:   [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14924 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
14925 // CHECK:   ret i8 [[TMP1]]
test_vqshrnh_n_u16(uint16_t a)14926 uint8_t test_vqshrnh_n_u16(uint16_t a) {
14927   return (uint8_t)vqshrnh_n_u16(a, 8);
14928 }
14929 
14930 // CHECK-LABEL: @test_vqshrns_n_u32(
14931 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14932 // CHECK:   [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14933 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
14934 // CHECK:   ret i16 [[TMP1]]
test_vqshrns_n_u32(uint32_t a)14935 uint16_t test_vqshrns_n_u32(uint32_t a) {
14936   return (uint16_t)vqshrns_n_u32(a, 16);
14937 }
14938 
14939 // CHECK-LABEL: @test_vqshrnd_n_u64(
14940 // CHECK:   [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
14941 // CHECK:   ret i32 [[VQSHRND_N_U64]]
test_vqshrnd_n_u64(uint64_t a)14942 uint32_t test_vqshrnd_n_u64(uint64_t a) {
14943   return (uint32_t)vqshrnd_n_u64(a, 32);
14944 }
14945 
14946 // CHECK-LABEL: @test_vqrshrnh_n_s16(
14947 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14948 // CHECK:   [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14949 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
14950 // CHECK:   ret i8 [[TMP1]]
test_vqrshrnh_n_s16(int16_t a)14951 int8_t test_vqrshrnh_n_s16(int16_t a) {
14952   return (int8_t)vqrshrnh_n_s16(a, 8);
14953 }
14954 
14955 // CHECK-LABEL: @test_vqrshrns_n_s32(
14956 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14957 // CHECK:   [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14958 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
14959 // CHECK:   ret i16 [[TMP1]]
test_vqrshrns_n_s32(int32_t a)14960 int16_t test_vqrshrns_n_s32(int32_t a) {
14961   return (int16_t)vqrshrns_n_s32(a, 16);
14962 }
14963 
14964 // CHECK-LABEL: @test_vqrshrnd_n_s64(
14965 // CHECK:   [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
14966 // CHECK:   ret i32 [[VQRSHRND_N_S64]]
test_vqrshrnd_n_s64(int64_t a)14967 int32_t test_vqrshrnd_n_s64(int64_t a) {
14968   return (int32_t)vqrshrnd_n_s64(a, 32);
14969 }
14970 
14971 // CHECK-LABEL: @test_vqrshrnh_n_u16(
14972 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14973 // CHECK:   [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14974 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
14975 // CHECK:   ret i8 [[TMP1]]
test_vqrshrnh_n_u16(uint16_t a)14976 uint8_t test_vqrshrnh_n_u16(uint16_t a) {
14977   return (uint8_t)vqrshrnh_n_u16(a, 8);
14978 }
14979 
14980 // CHECK-LABEL: @test_vqrshrns_n_u32(
14981 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14982 // CHECK:   [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14983 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
14984 // CHECK:   ret i16 [[TMP1]]
test_vqrshrns_n_u32(uint32_t a)14985 uint16_t test_vqrshrns_n_u32(uint32_t a) {
14986   return (uint16_t)vqrshrns_n_u32(a, 16);
14987 }
14988 
14989 // CHECK-LABEL: @test_vqrshrnd_n_u64(
14990 // CHECK:   [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
14991 // CHECK:   ret i32 [[VQRSHRND_N_U64]]
test_vqrshrnd_n_u64(uint64_t a)14992 uint32_t test_vqrshrnd_n_u64(uint64_t a) {
14993   return (uint32_t)vqrshrnd_n_u64(a, 32);
14994 }
14995 
14996 // CHECK-LABEL: @test_vqshrunh_n_s16(
14997 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14998 // CHECK:   [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14999 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
15000 // CHECK:   ret i8 [[TMP1]]
test_vqshrunh_n_s16(int16_t a)15001 int8_t test_vqshrunh_n_s16(int16_t a) {
15002   return (int8_t)vqshrunh_n_s16(a, 8);
15003 }
15004 
15005 // CHECK-LABEL: @test_vqshruns_n_s32(
15006 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
15007 // CHECK:   [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
15008 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
15009 // CHECK:   ret i16 [[TMP1]]
test_vqshruns_n_s32(int32_t a)15010 int16_t test_vqshruns_n_s32(int32_t a) {
15011   return (int16_t)vqshruns_n_s32(a, 16);
15012 }
15013 
15014 // CHECK-LABEL: @test_vqshrund_n_s64(
15015 // CHECK:   [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
15016 // CHECK:   ret i32 [[VQSHRUND_N_S64]]
test_vqshrund_n_s64(int64_t a)15017 int32_t test_vqshrund_n_s64(int64_t a) {
15018   return (int32_t)vqshrund_n_s64(a, 32);
15019 }
15020 
15021 // CHECK-LABEL: @test_vqrshrunh_n_s16(
15022 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
15023 // CHECK:   [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
15024 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
15025 // CHECK:   ret i8 [[TMP1]]
test_vqrshrunh_n_s16(int16_t a)15026 int8_t test_vqrshrunh_n_s16(int16_t a) {
15027   return (int8_t)vqrshrunh_n_s16(a, 8);
15028 }
15029 
15030 // CHECK-LABEL: @test_vqrshruns_n_s32(
15031 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
15032 // CHECK:   [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
15033 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
15034 // CHECK:   ret i16 [[TMP1]]
test_vqrshruns_n_s32(int32_t a)15035 int16_t test_vqrshruns_n_s32(int32_t a) {
15036   return (int16_t)vqrshruns_n_s32(a, 16);
15037 }
15038 
15039 // CHECK-LABEL: @test_vqrshrund_n_s64(
15040 // CHECK:   [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
15041 // CHECK:   ret i32 [[VQRSHRUND_N_S64]]
test_vqrshrund_n_s64(int64_t a)15042 int32_t test_vqrshrund_n_s64(int64_t a) {
15043   return (int32_t)vqrshrund_n_s64(a, 32);
15044 }
15045 
15046 // CHECK-LABEL: @test_vcvts_n_f32_s32(
15047 // CHECK:   [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
15048 // CHECK:   ret float [[VCVTS_N_F32_S32]]
test_vcvts_n_f32_s32(int32_t a)15049 float32_t test_vcvts_n_f32_s32(int32_t a) {
15050   return vcvts_n_f32_s32(a, 1);
15051 }
15052 
15053 // CHECK-LABEL: @test_vcvtd_n_f64_s64(
15054 // CHECK:   [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
15055 // CHECK:   ret double [[VCVTD_N_F64_S64]]
test_vcvtd_n_f64_s64(int64_t a)15056 float64_t test_vcvtd_n_f64_s64(int64_t a) {
15057   return vcvtd_n_f64_s64(a, 1);
15058 }
15059 
15060 // CHECK-LABEL: @test_vcvts_n_f32_u32(
15061 // CHECK:   [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
15062 // CHECK:   ret float [[VCVTS_N_F32_U32]]
test_vcvts_n_f32_u32(uint32_t a)15063 float32_t test_vcvts_n_f32_u32(uint32_t a) {
15064   return vcvts_n_f32_u32(a, 32);
15065 }
15066 
15067 // CHECK-LABEL: @test_vcvtd_n_f64_u64(
15068 // CHECK:   [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
15069 // CHECK:   ret double [[VCVTD_N_F64_U64]]
test_vcvtd_n_f64_u64(uint64_t a)15070 float64_t test_vcvtd_n_f64_u64(uint64_t a) {
15071   return vcvtd_n_f64_u64(a, 64);
15072 }
15073 
15074 // CHECK-LABEL: @test_vcvts_n_s32_f32(
15075 // CHECK:   [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
15076 // CHECK:   ret i32 [[VCVTS_N_S32_F32]]
test_vcvts_n_s32_f32(float32_t a)15077 int32_t test_vcvts_n_s32_f32(float32_t a) {
15078   return (int32_t)vcvts_n_s32_f32(a, 1);
15079 }
15080 
15081 // CHECK-LABEL: @test_vcvtd_n_s64_f64(
15082 // CHECK:   [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
15083 // CHECK:   ret i64 [[VCVTD_N_S64_F64]]
test_vcvtd_n_s64_f64(float64_t a)15084 int64_t test_vcvtd_n_s64_f64(float64_t a) {
15085   return (int64_t)vcvtd_n_s64_f64(a, 1);
15086 }
15087 
15088 // CHECK-LABEL: @test_vcvts_n_u32_f32(
15089 // CHECK:   [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
15090 // CHECK:   ret i32 [[VCVTS_N_U32_F32]]
test_vcvts_n_u32_f32(float32_t a)15091 uint32_t test_vcvts_n_u32_f32(float32_t a) {
15092   return (uint32_t)vcvts_n_u32_f32(a, 32);
15093 }
15094 
15095 // CHECK-LABEL: @test_vcvtd_n_u64_f64(
15096 // CHECK:   [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
15097 // CHECK:   ret i64 [[VCVTD_N_U64_F64]]
test_vcvtd_n_u64_f64(float64_t a)15098 uint64_t test_vcvtd_n_u64_f64(float64_t a) {
15099   return (uint64_t)vcvtd_n_u64_f64(a, 64);
15100 }
15101 
15102 // CHECK-LABEL: @test_vreinterpret_s8_s16(
15103 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15104 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)15105 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
15106   return vreinterpret_s8_s16(a);
15107 }
15108 
15109 // CHECK-LABEL: @test_vreinterpret_s8_s32(
15110 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15111 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)15112 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
15113   return vreinterpret_s8_s32(a);
15114 }
15115 
15116 // CHECK-LABEL: @test_vreinterpret_s8_s64(
15117 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15118 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)15119 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
15120   return vreinterpret_s8_s64(a);
15121 }
15122 
15123 // CHECK-LABEL: @test_vreinterpret_s8_u8(
15124 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)15125 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
15126   return vreinterpret_s8_u8(a);
15127 }
15128 
15129 // CHECK-LABEL: @test_vreinterpret_s8_u16(
15130 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15131 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)15132 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
15133   return vreinterpret_s8_u16(a);
15134 }
15135 
15136 // CHECK-LABEL: @test_vreinterpret_s8_u32(
15137 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15138 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)15139 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
15140   return vreinterpret_s8_u32(a);
15141 }
15142 
15143 // CHECK-LABEL: @test_vreinterpret_s8_u64(
15144 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15145 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)15146 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
15147   return vreinterpret_s8_u64(a);
15148 }
15149 
15150 // CHECK-LABEL: @test_vreinterpret_s8_f16(
15151 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15152 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)15153 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
15154   return vreinterpret_s8_f16(a);
15155 }
15156 
15157 // CHECK-LABEL: @test_vreinterpret_s8_f32(
15158 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15159 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)15160 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
15161   return vreinterpret_s8_f32(a);
15162 }
15163 
15164 // CHECK-LABEL: @test_vreinterpret_s8_f64(
15165 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15166 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f64(float64x1_t a)15167 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
15168   return vreinterpret_s8_f64(a);
15169 }
15170 
15171 // CHECK-LABEL: @test_vreinterpret_s8_p8(
15172 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)15173 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
15174   return vreinterpret_s8_p8(a);
15175 }
15176 
15177 // CHECK-LABEL: @test_vreinterpret_s8_p16(
15178 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15179 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)15180 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
15181   return vreinterpret_s8_p16(a);
15182 }
15183 
15184 // CHECK-LABEL: @test_vreinterpret_s8_p64(
15185 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15186 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p64(poly64x1_t a)15187 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
15188   return vreinterpret_s8_p64(a);
15189 }
15190 
15191 // CHECK-LABEL: @test_vreinterpret_s16_s8(
15192 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15193 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)15194 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
15195   return vreinterpret_s16_s8(a);
15196 }
15197 
15198 // CHECK-LABEL: @test_vreinterpret_s16_s32(
15199 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15200 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)15201 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
15202   return vreinterpret_s16_s32(a);
15203 }
15204 
15205 // CHECK-LABEL: @test_vreinterpret_s16_s64(
15206 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15207 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)15208 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
15209   return vreinterpret_s16_s64(a);
15210 }
15211 
15212 // CHECK-LABEL: @test_vreinterpret_s16_u8(
15213 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15214 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)15215 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
15216   return vreinterpret_s16_u8(a);
15217 }
15218 
15219 // CHECK-LABEL: @test_vreinterpret_s16_u16(
15220 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)15221 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
15222   return vreinterpret_s16_u16(a);
15223 }
15224 
15225 // CHECK-LABEL: @test_vreinterpret_s16_u32(
15226 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15227 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)15228 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
15229   return vreinterpret_s16_u32(a);
15230 }
15231 
15232 // CHECK-LABEL: @test_vreinterpret_s16_u64(
15233 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15234 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)15235 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
15236   return vreinterpret_s16_u64(a);
15237 }
15238 
15239 // CHECK-LABEL: @test_vreinterpret_s16_f16(
15240 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15241 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)15242 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
15243   return vreinterpret_s16_f16(a);
15244 }
15245 
15246 // CHECK-LABEL: @test_vreinterpret_s16_f32(
15247 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15248 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)15249 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
15250   return vreinterpret_s16_f32(a);
15251 }
15252 
15253 // CHECK-LABEL: @test_vreinterpret_s16_f64(
15254 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15255 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f64(float64x1_t a)15256 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
15257   return vreinterpret_s16_f64(a);
15258 }
15259 
15260 // CHECK-LABEL: @test_vreinterpret_s16_p8(
15261 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15262 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)15263 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
15264   return vreinterpret_s16_p8(a);
15265 }
15266 
15267 // CHECK-LABEL: @test_vreinterpret_s16_p16(
15268 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)15269 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
15270   return vreinterpret_s16_p16(a);
15271 }
15272 
15273 // CHECK-LABEL: @test_vreinterpret_s16_p64(
15274 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15275 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p64(poly64x1_t a)15276 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
15277   return vreinterpret_s16_p64(a);
15278 }
15279 
15280 // CHECK-LABEL: @test_vreinterpret_s32_s8(
15281 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15282 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)15283 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
15284   return vreinterpret_s32_s8(a);
15285 }
15286 
15287 // CHECK-LABEL: @test_vreinterpret_s32_s16(
15288 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15289 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)15290 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
15291   return vreinterpret_s32_s16(a);
15292 }
15293 
15294 // CHECK-LABEL: @test_vreinterpret_s32_s64(
15295 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15296 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)15297 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
15298   return vreinterpret_s32_s64(a);
15299 }
15300 
15301 // CHECK-LABEL: @test_vreinterpret_s32_u8(
15302 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15303 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)15304 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
15305   return vreinterpret_s32_u8(a);
15306 }
15307 
15308 // CHECK-LABEL: @test_vreinterpret_s32_u16(
15309 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15310 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)15311 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
15312   return vreinterpret_s32_u16(a);
15313 }
15314 
15315 // CHECK-LABEL: @test_vreinterpret_s32_u32(
15316 // CHECK:   ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)15317 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
15318   return vreinterpret_s32_u32(a);
15319 }
15320 
15321 // CHECK-LABEL: @test_vreinterpret_s32_u64(
15322 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15323 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)15324 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
15325   return vreinterpret_s32_u64(a);
15326 }
15327 
15328 // CHECK-LABEL: @test_vreinterpret_s32_f16(
15329 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15330 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)15331 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
15332   return vreinterpret_s32_f16(a);
15333 }
15334 
15335 // CHECK-LABEL: @test_vreinterpret_s32_f32(
15336 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15337 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)15338 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
15339   return vreinterpret_s32_f32(a);
15340 }
15341 
15342 // CHECK-LABEL: @test_vreinterpret_s32_f64(
15343 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15344 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f64(float64x1_t a)15345 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
15346   return vreinterpret_s32_f64(a);
15347 }
15348 
15349 // CHECK-LABEL: @test_vreinterpret_s32_p8(
15350 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15351 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)15352 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
15353   return vreinterpret_s32_p8(a);
15354 }
15355 
15356 // CHECK-LABEL: @test_vreinterpret_s32_p16(
15357 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15358 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)15359 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
15360   return vreinterpret_s32_p16(a);
15361 }
15362 
15363 // CHECK-LABEL: @test_vreinterpret_s32_p64(
15364 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15365 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p64(poly64x1_t a)15366 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
15367   return vreinterpret_s32_p64(a);
15368 }
15369 
15370 // CHECK-LABEL: @test_vreinterpret_s64_s8(
15371 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15372 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)15373 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
15374   return vreinterpret_s64_s8(a);
15375 }
15376 
15377 // CHECK-LABEL: @test_vreinterpret_s64_s16(
15378 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15379 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)15380 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
15381   return vreinterpret_s64_s16(a);
15382 }
15383 
15384 // CHECK-LABEL: @test_vreinterpret_s64_s32(
15385 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15386 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)15387 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
15388   return vreinterpret_s64_s32(a);
15389 }
15390 
15391 // CHECK-LABEL: @test_vreinterpret_s64_u8(
15392 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15393 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)15394 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
15395   return vreinterpret_s64_u8(a);
15396 }
15397 
15398 // CHECK-LABEL: @test_vreinterpret_s64_u16(
15399 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15400 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)15401 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
15402   return vreinterpret_s64_u16(a);
15403 }
15404 
15405 // CHECK-LABEL: @test_vreinterpret_s64_u32(
15406 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15407 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)15408 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
15409   return vreinterpret_s64_u32(a);
15410 }
15411 
15412 // CHECK-LABEL: @test_vreinterpret_s64_u64(
15413 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)15414 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
15415   return vreinterpret_s64_u64(a);
15416 }
15417 
15418 // CHECK-LABEL: @test_vreinterpret_s64_f16(
15419 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15420 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)15421 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
15422   return vreinterpret_s64_f16(a);
15423 }
15424 
15425 // CHECK-LABEL: @test_vreinterpret_s64_f32(
15426 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15427 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)15428 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
15429   return vreinterpret_s64_f32(a);
15430 }
15431 
15432 // CHECK-LABEL: @test_vreinterpret_s64_f64(
15433 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15434 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f64(float64x1_t a)15435 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
15436   return vreinterpret_s64_f64(a);
15437 }
15438 
15439 // CHECK-LABEL: @test_vreinterpret_s64_p8(
15440 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15441 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)15442 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
15443   return vreinterpret_s64_p8(a);
15444 }
15445 
15446 // CHECK-LABEL: @test_vreinterpret_s64_p16(
15447 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15448 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)15449 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
15450   return vreinterpret_s64_p16(a);
15451 }
15452 
15453 // CHECK-LABEL: @test_vreinterpret_s64_p64(
15454 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_p64(poly64x1_t a)15455 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
15456   return vreinterpret_s64_p64(a);
15457 }
15458 
15459 // CHECK-LABEL: @test_vreinterpret_u8_s8(
15460 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)15461 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
15462   return vreinterpret_u8_s8(a);
15463 }
15464 
15465 // CHECK-LABEL: @test_vreinterpret_u8_s16(
15466 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15467 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)15468 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
15469   return vreinterpret_u8_s16(a);
15470 }
15471 
15472 // CHECK-LABEL: @test_vreinterpret_u8_s32(
15473 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15474 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)15475 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
15476   return vreinterpret_u8_s32(a);
15477 }
15478 
15479 // CHECK-LABEL: @test_vreinterpret_u8_s64(
15480 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15481 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)15482 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
15483   return vreinterpret_u8_s64(a);
15484 }
15485 
15486 // CHECK-LABEL: @test_vreinterpret_u8_u16(
15487 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15488 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)15489 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
15490   return vreinterpret_u8_u16(a);
15491 }
15492 
15493 // CHECK-LABEL: @test_vreinterpret_u8_u32(
15494 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15495 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)15496 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
15497   return vreinterpret_u8_u32(a);
15498 }
15499 
15500 // CHECK-LABEL: @test_vreinterpret_u8_u64(
15501 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15502 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)15503 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
15504   return vreinterpret_u8_u64(a);
15505 }
15506 
15507 // CHECK-LABEL: @test_vreinterpret_u8_f16(
15508 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15509 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)15510 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
15511   return vreinterpret_u8_f16(a);
15512 }
15513 
15514 // CHECK-LABEL: @test_vreinterpret_u8_f32(
15515 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15516 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)15517 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
15518   return vreinterpret_u8_f32(a);
15519 }
15520 
15521 // CHECK-LABEL: @test_vreinterpret_u8_f64(
15522 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15523 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f64(float64x1_t a)15524 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
15525   return vreinterpret_u8_f64(a);
15526 }
15527 
15528 // CHECK-LABEL: @test_vreinterpret_u8_p8(
15529 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)15530 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
15531   return vreinterpret_u8_p8(a);
15532 }
15533 
15534 // CHECK-LABEL: @test_vreinterpret_u8_p16(
15535 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15536 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)15537 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
15538   return vreinterpret_u8_p16(a);
15539 }
15540 
15541 // CHECK-LABEL: @test_vreinterpret_u8_p64(
15542 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15543 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p64(poly64x1_t a)15544 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
15545   return vreinterpret_u8_p64(a);
15546 }
15547 
15548 // CHECK-LABEL: @test_vreinterpret_u16_s8(
15549 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15550 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)15551 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
15552   return vreinterpret_u16_s8(a);
15553 }
15554 
15555 // CHECK-LABEL: @test_vreinterpret_u16_s16(
15556 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)15557 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
15558   return vreinterpret_u16_s16(a);
15559 }
15560 
15561 // CHECK-LABEL: @test_vreinterpret_u16_s32(
15562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15563 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)15564 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
15565   return vreinterpret_u16_s32(a);
15566 }
15567 
15568 // CHECK-LABEL: @test_vreinterpret_u16_s64(
15569 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15570 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)15571 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
15572   return vreinterpret_u16_s64(a);
15573 }
15574 
15575 // CHECK-LABEL: @test_vreinterpret_u16_u8(
15576 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15577 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)15578 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
15579   return vreinterpret_u16_u8(a);
15580 }
15581 
15582 // CHECK-LABEL: @test_vreinterpret_u16_u32(
15583 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15584 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)15585 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
15586   return vreinterpret_u16_u32(a);
15587 }
15588 
15589 // CHECK-LABEL: @test_vreinterpret_u16_u64(
15590 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15591 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)15592 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
15593   return vreinterpret_u16_u64(a);
15594 }
15595 
15596 // CHECK-LABEL: @test_vreinterpret_u16_f16(
15597 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15598 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)15599 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
15600   return vreinterpret_u16_f16(a);
15601 }
15602 
15603 // CHECK-LABEL: @test_vreinterpret_u16_f32(
15604 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15605 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)15606 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
15607   return vreinterpret_u16_f32(a);
15608 }
15609 
15610 // CHECK-LABEL: @test_vreinterpret_u16_f64(
15611 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15612 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f64(float64x1_t a)15613 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
15614   return vreinterpret_u16_f64(a);
15615 }
15616 
15617 // CHECK-LABEL: @test_vreinterpret_u16_p8(
15618 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15619 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)15620 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
15621   return vreinterpret_u16_p8(a);
15622 }
15623 
15624 // CHECK-LABEL: @test_vreinterpret_u16_p16(
15625 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)15626 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
15627   return vreinterpret_u16_p16(a);
15628 }
15629 
15630 // CHECK-LABEL: @test_vreinterpret_u16_p64(
15631 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15632 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p64(poly64x1_t a)15633 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
15634   return vreinterpret_u16_p64(a);
15635 }
15636 
15637 // CHECK-LABEL: @test_vreinterpret_u32_s8(
15638 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15639 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)15640 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
15641   return vreinterpret_u32_s8(a);
15642 }
15643 
15644 // CHECK-LABEL: @test_vreinterpret_u32_s16(
15645 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15646 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)15647 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
15648   return vreinterpret_u32_s16(a);
15649 }
15650 
15651 // CHECK-LABEL: @test_vreinterpret_u32_s32(
15652 // CHECK:   ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)15653 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
15654   return vreinterpret_u32_s32(a);
15655 }
15656 
15657 // CHECK-LABEL: @test_vreinterpret_u32_s64(
15658 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15659 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)15660 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
15661   return vreinterpret_u32_s64(a);
15662 }
15663 
15664 // CHECK-LABEL: @test_vreinterpret_u32_u8(
15665 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15666 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)15667 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
15668   return vreinterpret_u32_u8(a);
15669 }
15670 
15671 // CHECK-LABEL: @test_vreinterpret_u32_u16(
15672 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15673 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)15674 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
15675   return vreinterpret_u32_u16(a);
15676 }
15677 
15678 // CHECK-LABEL: @test_vreinterpret_u32_u64(
15679 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15680 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)15681 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
15682   return vreinterpret_u32_u64(a);
15683 }
15684 
15685 // CHECK-LABEL: @test_vreinterpret_u32_f16(
15686 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15687 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)15688 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
15689   return vreinterpret_u32_f16(a);
15690 }
15691 
15692 // CHECK-LABEL: @test_vreinterpret_u32_f32(
15693 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15694 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)15695 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
15696   return vreinterpret_u32_f32(a);
15697 }
15698 
15699 // CHECK-LABEL: @test_vreinterpret_u32_f64(
15700 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15701 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f64(float64x1_t a)15702 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
15703   return vreinterpret_u32_f64(a);
15704 }
15705 
15706 // CHECK-LABEL: @test_vreinterpret_u32_p8(
15707 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15708 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)15709 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
15710   return vreinterpret_u32_p8(a);
15711 }
15712 
15713 // CHECK-LABEL: @test_vreinterpret_u32_p16(
15714 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15715 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)15716 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
15717   return vreinterpret_u32_p16(a);
15718 }
15719 
15720 // CHECK-LABEL: @test_vreinterpret_u32_p64(
15721 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15722 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p64(poly64x1_t a)15723 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
15724   return vreinterpret_u32_p64(a);
15725 }
15726 
15727 // CHECK-LABEL: @test_vreinterpret_u64_s8(
15728 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15729 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)15730 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
15731   return vreinterpret_u64_s8(a);
15732 }
15733 
15734 // CHECK-LABEL: @test_vreinterpret_u64_s16(
15735 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15736 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)15737 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
15738   return vreinterpret_u64_s16(a);
15739 }
15740 
15741 // CHECK-LABEL: @test_vreinterpret_u64_s32(
15742 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15743 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)15744 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
15745   return vreinterpret_u64_s32(a);
15746 }
15747 
15748 // CHECK-LABEL: @test_vreinterpret_u64_s64(
15749 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)15750 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
15751   return vreinterpret_u64_s64(a);
15752 }
15753 
15754 // CHECK-LABEL: @test_vreinterpret_u64_u8(
15755 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15756 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)15757 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
15758   return vreinterpret_u64_u8(a);
15759 }
15760 
15761 // CHECK-LABEL: @test_vreinterpret_u64_u16(
15762 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15763 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)15764 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
15765   return vreinterpret_u64_u16(a);
15766 }
15767 
15768 // CHECK-LABEL: @test_vreinterpret_u64_u32(
15769 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15770 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)15771 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
15772   return vreinterpret_u64_u32(a);
15773 }
15774 
15775 // CHECK-LABEL: @test_vreinterpret_u64_f16(
15776 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15777 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)15778 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
15779   return vreinterpret_u64_f16(a);
15780 }
15781 
15782 // CHECK-LABEL: @test_vreinterpret_u64_f32(
15783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15784 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)15785 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
15786   return vreinterpret_u64_f32(a);
15787 }
15788 
15789 // CHECK-LABEL: @test_vreinterpret_u64_f64(
15790 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15791 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f64(float64x1_t a)15792 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
15793   return vreinterpret_u64_f64(a);
15794 }
15795 
15796 // CHECK-LABEL: @test_vreinterpret_u64_p8(
15797 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15798 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)15799 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
15800   return vreinterpret_u64_p8(a);
15801 }
15802 
15803 // CHECK-LABEL: @test_vreinterpret_u64_p16(
15804 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15805 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)15806 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
15807   return vreinterpret_u64_p16(a);
15808 }
15809 
15810 // CHECK-LABEL: @test_vreinterpret_u64_p64(
15811 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_p64(poly64x1_t a)15812 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
15813   return vreinterpret_u64_p64(a);
15814 }
15815 
15816 // CHECK-LABEL: @test_vreinterpret_f16_s8(
15817 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15818 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)15819 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
15820   return vreinterpret_f16_s8(a);
15821 }
15822 
15823 // CHECK-LABEL: @test_vreinterpret_f16_s16(
15824 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15825 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)15826 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
15827   return vreinterpret_f16_s16(a);
15828 }
15829 
15830 // CHECK-LABEL: @test_vreinterpret_f16_s32(
15831 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15832 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)15833 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
15834   return vreinterpret_f16_s32(a);
15835 }
15836 
15837 // CHECK-LABEL: @test_vreinterpret_f16_s64(
15838 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15839 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)15840 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
15841   return vreinterpret_f16_s64(a);
15842 }
15843 
15844 // CHECK-LABEL: @test_vreinterpret_f16_u8(
15845 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15846 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)15847 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
15848   return vreinterpret_f16_u8(a);
15849 }
15850 
15851 // CHECK-LABEL: @test_vreinterpret_f16_u16(
15852 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15853 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)15854 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
15855   return vreinterpret_f16_u16(a);
15856 }
15857 
15858 // CHECK-LABEL: @test_vreinterpret_f16_u32(
15859 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15860 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)15861 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
15862   return vreinterpret_f16_u32(a);
15863 }
15864 
15865 // CHECK-LABEL: @test_vreinterpret_f16_u64(
15866 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15867 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)15868 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
15869   return vreinterpret_f16_u64(a);
15870 }
15871 
15872 // CHECK-LABEL: @test_vreinterpret_f16_f32(
15873 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
15874 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)15875 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
15876   return vreinterpret_f16_f32(a);
15877 }
15878 
15879 // CHECK-LABEL: @test_vreinterpret_f16_f64(
15880 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
15881 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f64(float64x1_t a)15882 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
15883   return vreinterpret_f16_f64(a);
15884 }
15885 
15886 // CHECK-LABEL: @test_vreinterpret_f16_p8(
15887 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15888 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)15889 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
15890   return vreinterpret_f16_p8(a);
15891 }
15892 
15893 // CHECK-LABEL: @test_vreinterpret_f16_p16(
15894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15895 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)15896 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
15897   return vreinterpret_f16_p16(a);
15898 }
15899 
15900 // CHECK-LABEL: @test_vreinterpret_f16_p64(
15901 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15902 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p64(poly64x1_t a)15903 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
15904   return vreinterpret_f16_p64(a);
15905 }
15906 
15907 // CHECK-LABEL: @test_vreinterpret_f32_s8(
15908 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15909 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)15910 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
15911   return vreinterpret_f32_s8(a);
15912 }
15913 
15914 // CHECK-LABEL: @test_vreinterpret_f32_s16(
15915 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15916 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)15917 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
15918   return vreinterpret_f32_s16(a);
15919 }
15920 
15921 // CHECK-LABEL: @test_vreinterpret_f32_s32(
15922 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15923 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)15924 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
15925   return vreinterpret_f32_s32(a);
15926 }
15927 
15928 // CHECK-LABEL: @test_vreinterpret_f32_s64(
15929 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15930 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)15931 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
15932   return vreinterpret_f32_s64(a);
15933 }
15934 
15935 // CHECK-LABEL: @test_vreinterpret_f32_u8(
15936 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15937 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)15938 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
15939   return vreinterpret_f32_u8(a);
15940 }
15941 
15942 // CHECK-LABEL: @test_vreinterpret_f32_u16(
15943 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15944 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)15945 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
15946   return vreinterpret_f32_u16(a);
15947 }
15948 
15949 // CHECK-LABEL: @test_vreinterpret_f32_u32(
15950 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15951 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)15952 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
15953   return vreinterpret_f32_u32(a);
15954 }
15955 
15956 // CHECK-LABEL: @test_vreinterpret_f32_u64(
15957 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15958 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)15959 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
15960   return vreinterpret_f32_u64(a);
15961 }
15962 
15963 // CHECK-LABEL: @test_vreinterpret_f32_f16(
15964 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
15965 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)15966 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
15967   return vreinterpret_f32_f16(a);
15968 }
15969 
15970 // CHECK-LABEL: @test_vreinterpret_f32_f64(
15971 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
15972 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f64(float64x1_t a)15973 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
15974   return vreinterpret_f32_f64(a);
15975 }
15976 
15977 // CHECK-LABEL: @test_vreinterpret_f32_p8(
15978 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15979 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)15980 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
15981   return vreinterpret_f32_p8(a);
15982 }
15983 
15984 // CHECK-LABEL: @test_vreinterpret_f32_p16(
15985 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15986 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)15987 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
15988   return vreinterpret_f32_p16(a);
15989 }
15990 
15991 // CHECK-LABEL: @test_vreinterpret_f32_p64(
15992 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15993 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p64(poly64x1_t a)15994 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
15995   return vreinterpret_f32_p64(a);
15996 }
15997 
15998 // CHECK-LABEL: @test_vreinterpret_f64_s8(
15999 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
16000 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s8(int8x8_t a)16001 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
16002   return vreinterpret_f64_s8(a);
16003 }
16004 
16005 // CHECK-LABEL: @test_vreinterpret_f64_s16(
16006 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
16007 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s16(int16x4_t a)16008 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
16009   return vreinterpret_f64_s16(a);
16010 }
16011 
16012 // CHECK-LABEL: @test_vreinterpret_f64_s32(
16013 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
16014 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s32(int32x2_t a)16015 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
16016   return vreinterpret_f64_s32(a);
16017 }
16018 
16019 // CHECK-LABEL: @test_vreinterpret_f64_s64(
16020 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
16021 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s64(int64x1_t a)16022 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
16023   return vreinterpret_f64_s64(a);
16024 }
16025 
16026 // CHECK-LABEL: @test_vreinterpret_f64_u8(
16027 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
16028 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u8(uint8x8_t a)16029 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
16030   return vreinterpret_f64_u8(a);
16031 }
16032 
16033 // CHECK-LABEL: @test_vreinterpret_f64_u16(
16034 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
16035 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u16(uint16x4_t a)16036 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
16037   return vreinterpret_f64_u16(a);
16038 }
16039 
16040 // CHECK-LABEL: @test_vreinterpret_f64_u32(
16041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
16042 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u32(uint32x2_t a)16043 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
16044   return vreinterpret_f64_u32(a);
16045 }
16046 
16047 // CHECK-LABEL: @test_vreinterpret_f64_u64(
16048 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
16049 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u64(uint64x1_t a)16050 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
16051   return vreinterpret_f64_u64(a);
16052 }
16053 
16054 // CHECK-LABEL: @test_vreinterpret_f64_f16(
16055 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
16056 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f16(float16x4_t a)16057 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
16058   return vreinterpret_f64_f16(a);
16059 }
16060 
16061 // CHECK-LABEL: @test_vreinterpret_f64_f32(
16062 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
16063 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f32(float32x2_t a)16064 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
16065   return vreinterpret_f64_f32(a);
16066 }
16067 
16068 // CHECK-LABEL: @test_vreinterpret_f64_p8(
16069 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
16070 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p8(poly8x8_t a)16071 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
16072   return vreinterpret_f64_p8(a);
16073 }
16074 
16075 // CHECK-LABEL: @test_vreinterpret_f64_p16(
16076 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
16077 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p16(poly16x4_t a)16078 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
16079   return vreinterpret_f64_p16(a);
16080 }
16081 
16082 // CHECK-LABEL: @test_vreinterpret_f64_p64(
16083 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
16084 // CHECK:   ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p64(poly64x1_t a)16085 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
16086   return vreinterpret_f64_p64(a);
16087 }
16088 
16089 // CHECK-LABEL: @test_vreinterpret_p8_s8(
16090 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)16091 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
16092   return vreinterpret_p8_s8(a);
16093 }
16094 
16095 // CHECK-LABEL: @test_vreinterpret_p8_s16(
16096 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16097 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)16098 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
16099   return vreinterpret_p8_s16(a);
16100 }
16101 
16102 // CHECK-LABEL: @test_vreinterpret_p8_s32(
16103 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16104 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)16105 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
16106   return vreinterpret_p8_s32(a);
16107 }
16108 
16109 // CHECK-LABEL: @test_vreinterpret_p8_s64(
16110 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16111 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)16112 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
16113   return vreinterpret_p8_s64(a);
16114 }
16115 
16116 // CHECK-LABEL: @test_vreinterpret_p8_u8(
16117 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)16118 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
16119   return vreinterpret_p8_u8(a);
16120 }
16121 
16122 // CHECK-LABEL: @test_vreinterpret_p8_u16(
16123 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16124 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)16125 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
16126   return vreinterpret_p8_u16(a);
16127 }
16128 
16129 // CHECK-LABEL: @test_vreinterpret_p8_u32(
16130 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16131 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)16132 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
16133   return vreinterpret_p8_u32(a);
16134 }
16135 
16136 // CHECK-LABEL: @test_vreinterpret_p8_u64(
16137 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16138 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)16139 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
16140   return vreinterpret_p8_u64(a);
16141 }
16142 
16143 // CHECK-LABEL: @test_vreinterpret_p8_f16(
16144 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
16145 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)16146 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
16147   return vreinterpret_p8_f16(a);
16148 }
16149 
16150 // CHECK-LABEL: @test_vreinterpret_p8_f32(
16151 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
16152 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)16153 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
16154   return vreinterpret_p8_f32(a);
16155 }
16156 
16157 // CHECK-LABEL: @test_vreinterpret_p8_f64(
16158 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
16159 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f64(float64x1_t a)16160 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
16161   return vreinterpret_p8_f64(a);
16162 }
16163 
16164 // CHECK-LABEL: @test_vreinterpret_p8_p16(
16165 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16166 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)16167 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
16168   return vreinterpret_p8_p16(a);
16169 }
16170 
16171 // CHECK-LABEL: @test_vreinterpret_p8_p64(
16172 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16173 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p64(poly64x1_t a)16174 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
16175   return vreinterpret_p8_p64(a);
16176 }
16177 
16178 // CHECK-LABEL: @test_vreinterpret_p16_s8(
16179 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16180 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)16181 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
16182   return vreinterpret_p16_s8(a);
16183 }
16184 
16185 // CHECK-LABEL: @test_vreinterpret_p16_s16(
16186 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)16187 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
16188   return vreinterpret_p16_s16(a);
16189 }
16190 
16191 // CHECK-LABEL: @test_vreinterpret_p16_s32(
16192 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16193 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)16194 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
16195   return vreinterpret_p16_s32(a);
16196 }
16197 
16198 // CHECK-LABEL: @test_vreinterpret_p16_s64(
16199 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16200 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)16201 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
16202   return vreinterpret_p16_s64(a);
16203 }
16204 
16205 // CHECK-LABEL: @test_vreinterpret_p16_u8(
16206 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16207 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)16208 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
16209   return vreinterpret_p16_u8(a);
16210 }
16211 
16212 // CHECK-LABEL: @test_vreinterpret_p16_u16(
16213 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)16214 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
16215   return vreinterpret_p16_u16(a);
16216 }
16217 
16218 // CHECK-LABEL: @test_vreinterpret_p16_u32(
16219 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16220 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)16221 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
16222   return vreinterpret_p16_u32(a);
16223 }
16224 
16225 // CHECK-LABEL: @test_vreinterpret_p16_u64(
16226 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16227 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)16228 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
16229   return vreinterpret_p16_u64(a);
16230 }
16231 
16232 // CHECK-LABEL: @test_vreinterpret_p16_f16(
16233 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
16234 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)16235 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
16236   return vreinterpret_p16_f16(a);
16237 }
16238 
16239 // CHECK-LABEL: @test_vreinterpret_p16_f32(
16240 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
16241 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)16242 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
16243   return vreinterpret_p16_f32(a);
16244 }
16245 
16246 // CHECK-LABEL: @test_vreinterpret_p16_f64(
16247 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
16248 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f64(float64x1_t a)16249 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
16250   return vreinterpret_p16_f64(a);
16251 }
16252 
16253 // CHECK-LABEL: @test_vreinterpret_p16_p8(
16254 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16255 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)16256 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
16257   return vreinterpret_p16_p8(a);
16258 }
16259 
16260 // CHECK-LABEL: @test_vreinterpret_p16_p64(
16261 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16262 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p64(poly64x1_t a)16263 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
16264   return vreinterpret_p16_p64(a);
16265 }
16266 
16267 // CHECK-LABEL: @test_vreinterpret_p64_s8(
16268 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16269 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s8(int8x8_t a)16270 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
16271   return vreinterpret_p64_s8(a);
16272 }
16273 
16274 // CHECK-LABEL: @test_vreinterpret_p64_s16(
16275 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16276 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s16(int16x4_t a)16277 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
16278   return vreinterpret_p64_s16(a);
16279 }
16280 
16281 // CHECK-LABEL: @test_vreinterpret_p64_s32(
16282 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16283 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s32(int32x2_t a)16284 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
16285   return vreinterpret_p64_s32(a);
16286 }
16287 
16288 // CHECK-LABEL: @test_vreinterpret_p64_s64(
16289 // CHECK:   ret <1 x i64> %a
test_vreinterpret_p64_s64(int64x1_t a)16290 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
16291   return vreinterpret_p64_s64(a);
16292 }
16293 
16294 // CHECK-LABEL: @test_vreinterpret_p64_u8(
16295 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16296 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u8(uint8x8_t a)16297 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
16298   return vreinterpret_p64_u8(a);
16299 }
16300 
16301 // CHECK-LABEL: @test_vreinterpret_p64_u16(
16302 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16303 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u16(uint16x4_t a)16304 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
16305   return vreinterpret_p64_u16(a);
16306 }
16307 
16308 // CHECK-LABEL: @test_vreinterpret_p64_u32(
16309 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16310 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u32(uint32x2_t a)16311 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
16312   return vreinterpret_p64_u32(a);
16313 }
16314 
16315 // CHECK-LABEL: @test_vreinterpret_p64_u64(
16316 // CHECK:   ret <1 x i64> %a
test_vreinterpret_p64_u64(uint64x1_t a)16317 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
16318   return vreinterpret_p64_u64(a);
16319 }
16320 
16321 // CHECK-LABEL: @test_vreinterpret_p64_f16(
16322 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
16323 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f16(float16x4_t a)16324 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
16325   return vreinterpret_p64_f16(a);
16326 }
16327 
16328 // CHECK-LABEL: @test_vreinterpret_p64_f32(
16329 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
16330 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f32(float32x2_t a)16331 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
16332   return vreinterpret_p64_f32(a);
16333 }
16334 
16335 // CHECK-LABEL: @test_vreinterpret_p64_f64(
16336 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
16337 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f64(float64x1_t a)16338 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
16339   return vreinterpret_p64_f64(a);
16340 }
16341 
16342 // CHECK-LABEL: @test_vreinterpret_p64_p8(
16343 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16344 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p8(poly8x8_t a)16345 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
16346   return vreinterpret_p64_p8(a);
16347 }
16348 
16349 // CHECK-LABEL: @test_vreinterpret_p64_p16(
16350 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16351 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p16(poly16x4_t a)16352 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
16353   return vreinterpret_p64_p16(a);
16354 }
16355 
16356 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
16357 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16358 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)16359 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
16360   return vreinterpretq_s8_s16(a);
16361 }
16362 
16363 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
16364 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16365 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)16366 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
16367   return vreinterpretq_s8_s32(a);
16368 }
16369 
16370 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
16371 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16372 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)16373 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
16374   return vreinterpretq_s8_s64(a);
16375 }
16376 
16377 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
16378 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)16379 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
16380   return vreinterpretq_s8_u8(a);
16381 }
16382 
16383 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
16384 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16385 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)16386 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
16387   return vreinterpretq_s8_u16(a);
16388 }
16389 
16390 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
16391 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16392 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)16393 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
16394   return vreinterpretq_s8_u32(a);
16395 }
16396 
16397 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
16398 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16399 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)16400 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
16401   return vreinterpretq_s8_u64(a);
16402 }
16403 
16404 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
16405 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16406 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)16407 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
16408   return vreinterpretq_s8_f16(a);
16409 }
16410 
16411 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
16412 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16413 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)16414 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
16415   return vreinterpretq_s8_f32(a);
16416 }
16417 
16418 // CHECK-LABEL: @test_vreinterpretq_s8_f64(
16419 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16420 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f64(float64x2_t a)16421 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
16422   return vreinterpretq_s8_f64(a);
16423 }
16424 
16425 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
16426 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)16427 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
16428   return vreinterpretq_s8_p8(a);
16429 }
16430 
16431 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
16432 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16433 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)16434 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
16435   return vreinterpretq_s8_p16(a);
16436 }
16437 
16438 // CHECK-LABEL: @test_vreinterpretq_s8_p64(
16439 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16440 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p64(poly64x2_t a)16441 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
16442   return vreinterpretq_s8_p64(a);
16443 }
16444 
16445 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
16446 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16447 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)16448 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
16449   return vreinterpretq_s16_s8(a);
16450 }
16451 
16452 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
16453 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16454 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)16455 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
16456   return vreinterpretq_s16_s32(a);
16457 }
16458 
16459 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
16460 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16461 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)16462 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
16463   return vreinterpretq_s16_s64(a);
16464 }
16465 
16466 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
16467 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16468 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)16469 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
16470   return vreinterpretq_s16_u8(a);
16471 }
16472 
16473 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
16474 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)16475 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
16476   return vreinterpretq_s16_u16(a);
16477 }
16478 
16479 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
16480 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16481 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)16482 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
16483   return vreinterpretq_s16_u32(a);
16484 }
16485 
16486 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
16487 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16488 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)16489 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
16490   return vreinterpretq_s16_u64(a);
16491 }
16492 
16493 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
16494 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16495 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)16496 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
16497   return vreinterpretq_s16_f16(a);
16498 }
16499 
16500 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
16501 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16502 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)16503 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
16504   return vreinterpretq_s16_f32(a);
16505 }
16506 
16507 // CHECK-LABEL: @test_vreinterpretq_s16_f64(
16508 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16509 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f64(float64x2_t a)16510 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
16511   return vreinterpretq_s16_f64(a);
16512 }
16513 
16514 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
16515 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16516 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)16517 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
16518   return vreinterpretq_s16_p8(a);
16519 }
16520 
16521 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
16522 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)16523 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
16524   return vreinterpretq_s16_p16(a);
16525 }
16526 
16527 // CHECK-LABEL: @test_vreinterpretq_s16_p64(
16528 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16529 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p64(poly64x2_t a)16530 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
16531   return vreinterpretq_s16_p64(a);
16532 }
16533 
16534 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
16535 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16536 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)16537 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
16538   return vreinterpretq_s32_s8(a);
16539 }
16540 
16541 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
16542 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16543 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)16544 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
16545   return vreinterpretq_s32_s16(a);
16546 }
16547 
16548 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
16549 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16550 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)16551 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
16552   return vreinterpretq_s32_s64(a);
16553 }
16554 
16555 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
16556 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16557 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)16558 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
16559   return vreinterpretq_s32_u8(a);
16560 }
16561 
16562 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
16563 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16564 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)16565 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
16566   return vreinterpretq_s32_u16(a);
16567 }
16568 
16569 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
16570 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)16571 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
16572   return vreinterpretq_s32_u32(a);
16573 }
16574 
16575 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
16576 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16577 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)16578 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
16579   return vreinterpretq_s32_u64(a);
16580 }
16581 
16582 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
16583 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16584 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)16585 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
16586   return vreinterpretq_s32_f16(a);
16587 }
16588 
16589 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
16590 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16591 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)16592 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
16593   return vreinterpretq_s32_f32(a);
16594 }
16595 
16596 // CHECK-LABEL: @test_vreinterpretq_s32_f64(
16597 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16598 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f64(float64x2_t a)16599 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
16600   return vreinterpretq_s32_f64(a);
16601 }
16602 
16603 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
16604 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16605 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)16606 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
16607   return vreinterpretq_s32_p8(a);
16608 }
16609 
16610 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
16611 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16612 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)16613 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
16614   return vreinterpretq_s32_p16(a);
16615 }
16616 
16617 // CHECK-LABEL: @test_vreinterpretq_s32_p64(
16618 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16619 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p64(poly64x2_t a)16620 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
16621   return vreinterpretq_s32_p64(a);
16622 }
16623 
16624 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
16625 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16626 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)16627 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
16628   return vreinterpretq_s64_s8(a);
16629 }
16630 
16631 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
16632 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16633 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)16634 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
16635   return vreinterpretq_s64_s16(a);
16636 }
16637 
16638 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
16639 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16640 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)16641 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
16642   return vreinterpretq_s64_s32(a);
16643 }
16644 
16645 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
16646 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16647 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)16648 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
16649   return vreinterpretq_s64_u8(a);
16650 }
16651 
16652 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
16653 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16654 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)16655 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
16656   return vreinterpretq_s64_u16(a);
16657 }
16658 
16659 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
16660 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16661 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)16662 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
16663   return vreinterpretq_s64_u32(a);
16664 }
16665 
16666 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
16667 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)16668 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
16669   return vreinterpretq_s64_u64(a);
16670 }
16671 
16672 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
16673 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16674 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)16675 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
16676   return vreinterpretq_s64_f16(a);
16677 }
16678 
16679 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
16680 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16681 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)16682 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
16683   return vreinterpretq_s64_f32(a);
16684 }
16685 
16686 // CHECK-LABEL: @test_vreinterpretq_s64_f64(
16687 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16688 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f64(float64x2_t a)16689 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
16690   return vreinterpretq_s64_f64(a);
16691 }
16692 
16693 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
16694 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16695 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)16696 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
16697   return vreinterpretq_s64_p8(a);
16698 }
16699 
16700 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
16701 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16702 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)16703 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
16704   return vreinterpretq_s64_p16(a);
16705 }
16706 
16707 // CHECK-LABEL: @test_vreinterpretq_s64_p64(
16708 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_p64(poly64x2_t a)16709 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
16710   return vreinterpretq_s64_p64(a);
16711 }
16712 
16713 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
16714 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)16715 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
16716   return vreinterpretq_u8_s8(a);
16717 }
16718 
16719 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
16720 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16721 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)16722 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
16723   return vreinterpretq_u8_s16(a);
16724 }
16725 
16726 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
16727 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16728 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)16729 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
16730   return vreinterpretq_u8_s32(a);
16731 }
16732 
16733 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
16734 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16735 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)16736 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
16737   return vreinterpretq_u8_s64(a);
16738 }
16739 
16740 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
16741 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16742 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)16743 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
16744   return vreinterpretq_u8_u16(a);
16745 }
16746 
16747 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
16748 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16749 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)16750 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
16751   return vreinterpretq_u8_u32(a);
16752 }
16753 
16754 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
16755 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16756 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)16757 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
16758   return vreinterpretq_u8_u64(a);
16759 }
16760 
16761 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
16762 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16763 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)16764 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
16765   return vreinterpretq_u8_f16(a);
16766 }
16767 
16768 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
16769 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16770 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)16771 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
16772   return vreinterpretq_u8_f32(a);
16773 }
16774 
16775 // CHECK-LABEL: @test_vreinterpretq_u8_f64(
16776 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16777 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f64(float64x2_t a)16778 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
16779   return vreinterpretq_u8_f64(a);
16780 }
16781 
16782 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
16783 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)16784 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
16785   return vreinterpretq_u8_p8(a);
16786 }
16787 
16788 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
16789 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16790 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)16791 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
16792   return vreinterpretq_u8_p16(a);
16793 }
16794 
16795 // CHECK-LABEL: @test_vreinterpretq_u8_p64(
16796 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16797 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p64(poly64x2_t a)16798 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
16799   return vreinterpretq_u8_p64(a);
16800 }
16801 
16802 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
16803 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16804 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)16805 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
16806   return vreinterpretq_u16_s8(a);
16807 }
16808 
16809 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
16810 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)16811 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
16812   return vreinterpretq_u16_s16(a);
16813 }
16814 
16815 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
16816 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16817 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)16818 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
16819   return vreinterpretq_u16_s32(a);
16820 }
16821 
16822 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
16823 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16824 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)16825 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
16826   return vreinterpretq_u16_s64(a);
16827 }
16828 
16829 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
16830 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16831 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)16832 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
16833   return vreinterpretq_u16_u8(a);
16834 }
16835 
16836 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
16837 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16838 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)16839 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
16840   return vreinterpretq_u16_u32(a);
16841 }
16842 
16843 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
16844 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16845 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)16846 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
16847   return vreinterpretq_u16_u64(a);
16848 }
16849 
16850 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
16851 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16852 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)16853 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
16854   return vreinterpretq_u16_f16(a);
16855 }
16856 
16857 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
16858 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16859 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)16860 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
16861   return vreinterpretq_u16_f32(a);
16862 }
16863 
16864 // CHECK-LABEL: @test_vreinterpretq_u16_f64(
16865 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16866 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f64(float64x2_t a)16867 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
16868   return vreinterpretq_u16_f64(a);
16869 }
16870 
16871 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
16872 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16873 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)16874 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
16875   return vreinterpretq_u16_p8(a);
16876 }
16877 
16878 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
16879 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)16880 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
16881   return vreinterpretq_u16_p16(a);
16882 }
16883 
16884 // CHECK-LABEL: @test_vreinterpretq_u16_p64(
16885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16886 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p64(poly64x2_t a)16887 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
16888   return vreinterpretq_u16_p64(a);
16889 }
16890 
16891 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
16892 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16893 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)16894 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
16895   return vreinterpretq_u32_s8(a);
16896 }
16897 
16898 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
16899 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16900 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)16901 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
16902   return vreinterpretq_u32_s16(a);
16903 }
16904 
16905 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
16906 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)16907 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
16908   return vreinterpretq_u32_s32(a);
16909 }
16910 
16911 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
16912 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16913 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)16914 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
16915   return vreinterpretq_u32_s64(a);
16916 }
16917 
16918 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
16919 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16920 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)16921 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
16922   return vreinterpretq_u32_u8(a);
16923 }
16924 
16925 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
16926 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16927 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)16928 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
16929   return vreinterpretq_u32_u16(a);
16930 }
16931 
16932 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
16933 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16934 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)16935 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
16936   return vreinterpretq_u32_u64(a);
16937 }
16938 
16939 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
16940 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16941 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)16942 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
16943   return vreinterpretq_u32_f16(a);
16944 }
16945 
16946 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
16947 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16948 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)16949 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
16950   return vreinterpretq_u32_f32(a);
16951 }
16952 
16953 // CHECK-LABEL: @test_vreinterpretq_u32_f64(
16954 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16955 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f64(float64x2_t a)16956 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
16957   return vreinterpretq_u32_f64(a);
16958 }
16959 
16960 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
16961 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16962 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)16963 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
16964   return vreinterpretq_u32_p8(a);
16965 }
16966 
16967 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
16968 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16969 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)16970 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
16971   return vreinterpretq_u32_p16(a);
16972 }
16973 
16974 // CHECK-LABEL: @test_vreinterpretq_u32_p64(
16975 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16976 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p64(poly64x2_t a)16977 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
16978   return vreinterpretq_u32_p64(a);
16979 }
16980 
16981 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
16982 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16983 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)16984 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
16985   return vreinterpretq_u64_s8(a);
16986 }
16987 
16988 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
16989 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16990 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)16991 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
16992   return vreinterpretq_u64_s16(a);
16993 }
16994 
16995 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
16996 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16997 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)16998 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
16999   return vreinterpretq_u64_s32(a);
17000 }
17001 
17002 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
17003 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)17004 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
17005   return vreinterpretq_u64_s64(a);
17006 }
17007 
17008 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
17009 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17010 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)17011 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
17012   return vreinterpretq_u64_u8(a);
17013 }
17014 
17015 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
17016 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17017 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)17018 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
17019   return vreinterpretq_u64_u16(a);
17020 }
17021 
17022 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
17023 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17024 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)17025 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
17026   return vreinterpretq_u64_u32(a);
17027 }
17028 
17029 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
17030 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
17031 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)17032 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
17033   return vreinterpretq_u64_f16(a);
17034 }
17035 
17036 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
17037 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
17038 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)17039 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
17040   return vreinterpretq_u64_f32(a);
17041 }
17042 
17043 // CHECK-LABEL: @test_vreinterpretq_u64_f64(
17044 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
17045 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f64(float64x2_t a)17046 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
17047   return vreinterpretq_u64_f64(a);
17048 }
17049 
17050 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
17051 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17052 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)17053 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
17054   return vreinterpretq_u64_p8(a);
17055 }
17056 
17057 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
17058 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17059 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)17060 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
17061   return vreinterpretq_u64_p16(a);
17062 }
17063 
17064 // CHECK-LABEL: @test_vreinterpretq_u64_p64(
17065 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_p64(poly64x2_t a)17066 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
17067   return vreinterpretq_u64_p64(a);
17068 }
17069 
17070 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
17071 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
17072 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)17073 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
17074   return vreinterpretq_f16_s8(a);
17075 }
17076 
17077 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
17078 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17079 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)17080 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
17081   return vreinterpretq_f16_s16(a);
17082 }
17083 
17084 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
17085 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
17086 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)17087 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
17088   return vreinterpretq_f16_s32(a);
17089 }
17090 
17091 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
17092 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17093 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)17094 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
17095   return vreinterpretq_f16_s64(a);
17096 }
17097 
17098 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
17099 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
17100 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)17101 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
17102   return vreinterpretq_f16_u8(a);
17103 }
17104 
17105 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
17106 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17107 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)17108 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
17109   return vreinterpretq_f16_u16(a);
17110 }
17111 
17112 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
17113 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
17114 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)17115 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
17116   return vreinterpretq_f16_u32(a);
17117 }
17118 
17119 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
17120 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17121 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)17122 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
17123   return vreinterpretq_f16_u64(a);
17124 }
17125 
17126 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
17127 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
17128 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)17129 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
17130   return vreinterpretq_f16_f32(a);
17131 }
17132 
17133 // CHECK-LABEL: @test_vreinterpretq_f16_f64(
17134 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
17135 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f64(float64x2_t a)17136 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
17137   return vreinterpretq_f16_f64(a);
17138 }
17139 
17140 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
17141 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
17142 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)17143 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
17144   return vreinterpretq_f16_p8(a);
17145 }
17146 
17147 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
17148 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17149 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)17150 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
17151   return vreinterpretq_f16_p16(a);
17152 }
17153 
17154 // CHECK-LABEL: @test_vreinterpretq_f16_p64(
17155 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17156 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p64(poly64x2_t a)17157 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
17158   return vreinterpretq_f16_p64(a);
17159 }
17160 
17161 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
17162 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17163 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)17164 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
17165   return vreinterpretq_f32_s8(a);
17166 }
17167 
17168 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
17169 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17170 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)17171 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
17172   return vreinterpretq_f32_s16(a);
17173 }
17174 
17175 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
17176 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17177 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)17178 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
17179   return vreinterpretq_f32_s32(a);
17180 }
17181 
17182 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
17183 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17184 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)17185 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
17186   return vreinterpretq_f32_s64(a);
17187 }
17188 
17189 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
17190 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17191 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)17192 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
17193   return vreinterpretq_f32_u8(a);
17194 }
17195 
17196 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
17197 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17198 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)17199 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
17200   return vreinterpretq_f32_u16(a);
17201 }
17202 
17203 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
17204 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17205 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)17206 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
17207   return vreinterpretq_f32_u32(a);
17208 }
17209 
17210 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
17211 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17212 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)17213 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
17214   return vreinterpretq_f32_u64(a);
17215 }
17216 
17217 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
17218 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
17219 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)17220 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
17221   return vreinterpretq_f32_f16(a);
17222 }
17223 
17224 // CHECK-LABEL: @test_vreinterpretq_f32_f64(
17225 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
17226 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f64(float64x2_t a)17227 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
17228   return vreinterpretq_f32_f64(a);
17229 }
17230 
17231 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
17232 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17233 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)17234 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
17235   return vreinterpretq_f32_p8(a);
17236 }
17237 
17238 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
17239 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17240 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)17241 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
17242   return vreinterpretq_f32_p16(a);
17243 }
17244 
17245 // CHECK-LABEL: @test_vreinterpretq_f32_p64(
17246 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17247 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p64(poly64x2_t a)17248 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
17249   return vreinterpretq_f32_p64(a);
17250 }
17251 
17252 // CHECK-LABEL: @test_vreinterpretq_f64_s8(
17253 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17254 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s8(int8x16_t a)17255 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
17256   return vreinterpretq_f64_s8(a);
17257 }
17258 
17259 // CHECK-LABEL: @test_vreinterpretq_f64_s16(
17260 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17261 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s16(int16x8_t a)17262 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
17263   return vreinterpretq_f64_s16(a);
17264 }
17265 
17266 // CHECK-LABEL: @test_vreinterpretq_f64_s32(
17267 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17268 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s32(int32x4_t a)17269 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
17270   return vreinterpretq_f64_s32(a);
17271 }
17272 
17273 // CHECK-LABEL: @test_vreinterpretq_f64_s64(
17274 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17275 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s64(int64x2_t a)17276 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
17277   return vreinterpretq_f64_s64(a);
17278 }
17279 
17280 // CHECK-LABEL: @test_vreinterpretq_f64_u8(
17281 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17282 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u8(uint8x16_t a)17283 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
17284   return vreinterpretq_f64_u8(a);
17285 }
17286 
17287 // CHECK-LABEL: @test_vreinterpretq_f64_u16(
17288 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17289 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u16(uint16x8_t a)17290 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
17291   return vreinterpretq_f64_u16(a);
17292 }
17293 
17294 // CHECK-LABEL: @test_vreinterpretq_f64_u32(
17295 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17296 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u32(uint32x4_t a)17297 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
17298   return vreinterpretq_f64_u32(a);
17299 }
17300 
17301 // CHECK-LABEL: @test_vreinterpretq_f64_u64(
17302 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17303 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u64(uint64x2_t a)17304 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
17305   return vreinterpretq_f64_u64(a);
17306 }
17307 
17308 // CHECK-LABEL: @test_vreinterpretq_f64_f16(
17309 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
17310 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f16(float16x8_t a)17311 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
17312   return vreinterpretq_f64_f16(a);
17313 }
17314 
17315 // CHECK-LABEL: @test_vreinterpretq_f64_f32(
17316 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
17317 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f32(float32x4_t a)17318 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
17319   return vreinterpretq_f64_f32(a);
17320 }
17321 
17322 // CHECK-LABEL: @test_vreinterpretq_f64_p8(
17323 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17324 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p8(poly8x16_t a)17325 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
17326   return vreinterpretq_f64_p8(a);
17327 }
17328 
17329 // CHECK-LABEL: @test_vreinterpretq_f64_p16(
17330 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17331 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p16(poly16x8_t a)17332 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
17333   return vreinterpretq_f64_p16(a);
17334 }
17335 
17336 // CHECK-LABEL: @test_vreinterpretq_f64_p64(
17337 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17338 // CHECK:   ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p64(poly64x2_t a)17339 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
17340   return vreinterpretq_f64_p64(a);
17341 }
17342 
17343 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
17344 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)17345 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
17346   return vreinterpretq_p8_s8(a);
17347 }
17348 
17349 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
17350 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17351 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)17352 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
17353   return vreinterpretq_p8_s16(a);
17354 }
17355 
17356 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
17357 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17358 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)17359 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
17360   return vreinterpretq_p8_s32(a);
17361 }
17362 
17363 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
17364 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17365 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)17366 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
17367   return vreinterpretq_p8_s64(a);
17368 }
17369 
17370 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
17371 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)17372 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
17373   return vreinterpretq_p8_u8(a);
17374 }
17375 
17376 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
17377 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17378 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)17379 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
17380   return vreinterpretq_p8_u16(a);
17381 }
17382 
17383 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
17384 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17385 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)17386 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
17387   return vreinterpretq_p8_u32(a);
17388 }
17389 
17390 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
17391 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17392 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)17393 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
17394   return vreinterpretq_p8_u64(a);
17395 }
17396 
17397 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
17398 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
17399 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)17400 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
17401   return vreinterpretq_p8_f16(a);
17402 }
17403 
17404 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
17405 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
17406 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)17407 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
17408   return vreinterpretq_p8_f32(a);
17409 }
17410 
17411 // CHECK-LABEL: @test_vreinterpretq_p8_f64(
17412 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17413 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f64(float64x2_t a)17414 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
17415   return vreinterpretq_p8_f64(a);
17416 }
17417 
17418 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
17419 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17420 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)17421 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
17422   return vreinterpretq_p8_p16(a);
17423 }
17424 
17425 // CHECK-LABEL: @test_vreinterpretq_p8_p64(
17426 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17427 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p64(poly64x2_t a)17428 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
17429   return vreinterpretq_p8_p64(a);
17430 }
17431 
17432 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
17433 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17434 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)17435 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
17436   return vreinterpretq_p16_s8(a);
17437 }
17438 
17439 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
17440 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)17441 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
17442   return vreinterpretq_p16_s16(a);
17443 }
17444 
17445 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
17446 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17447 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)17448 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
17449   return vreinterpretq_p16_s32(a);
17450 }
17451 
17452 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
17453 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17454 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)17455 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
17456   return vreinterpretq_p16_s64(a);
17457 }
17458 
17459 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
17460 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17461 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)17462 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
17463   return vreinterpretq_p16_u8(a);
17464 }
17465 
17466 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
17467 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)17468 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
17469   return vreinterpretq_p16_u16(a);
17470 }
17471 
17472 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
17473 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17474 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)17475 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
17476   return vreinterpretq_p16_u32(a);
17477 }
17478 
17479 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
17480 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17481 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)17482 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
17483   return vreinterpretq_p16_u64(a);
17484 }
17485 
17486 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
17487 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
17488 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)17489 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
17490   return vreinterpretq_p16_f16(a);
17491 }
17492 
17493 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
17494 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
17495 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)17496 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
17497   return vreinterpretq_p16_f32(a);
17498 }
17499 
17500 // CHECK-LABEL: @test_vreinterpretq_p16_f64(
17501 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
17502 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f64(float64x2_t a)17503 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
17504   return vreinterpretq_p16_f64(a);
17505 }
17506 
17507 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
17508 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17509 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)17510 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
17511   return vreinterpretq_p16_p8(a);
17512 }
17513 
17514 // CHECK-LABEL: @test_vreinterpretq_p16_p64(
17515 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17516 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p64(poly64x2_t a)17517 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
17518   return vreinterpretq_p16_p64(a);
17519 }
17520 
17521 // CHECK-LABEL: @test_vreinterpretq_p64_s8(
17522 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17523 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s8(int8x16_t a)17524 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
17525   return vreinterpretq_p64_s8(a);
17526 }
17527 
17528 // CHECK-LABEL: @test_vreinterpretq_p64_s16(
17529 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17530 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s16(int16x8_t a)17531 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
17532   return vreinterpretq_p64_s16(a);
17533 }
17534 
17535 // CHECK-LABEL: @test_vreinterpretq_p64_s32(
17536 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17537 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s32(int32x4_t a)17538 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
17539   return vreinterpretq_p64_s32(a);
17540 }
17541 
17542 // CHECK-LABEL: @test_vreinterpretq_p64_s64(
17543 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_p64_s64(int64x2_t a)17544 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
17545   return vreinterpretq_p64_s64(a);
17546 }
17547 
17548 // CHECK-LABEL: @test_vreinterpretq_p64_u8(
17549 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17550 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u8(uint8x16_t a)17551 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
17552   return vreinterpretq_p64_u8(a);
17553 }
17554 
17555 // CHECK-LABEL: @test_vreinterpretq_p64_u16(
17556 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17557 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u16(uint16x8_t a)17558 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
17559   return vreinterpretq_p64_u16(a);
17560 }
17561 
17562 // CHECK-LABEL: @test_vreinterpretq_p64_u32(
17563 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17564 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u32(uint32x4_t a)17565 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
17566   return vreinterpretq_p64_u32(a);
17567 }
17568 
17569 // CHECK-LABEL: @test_vreinterpretq_p64_u64(
17570 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_p64_u64(uint64x2_t a)17571 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
17572   return vreinterpretq_p64_u64(a);
17573 }
17574 
17575 // CHECK-LABEL: @test_vreinterpretq_p64_f16(
17576 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
17577 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f16(float16x8_t a)17578 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
17579   return vreinterpretq_p64_f16(a);
17580 }
17581 
17582 // CHECK-LABEL: @test_vreinterpretq_p64_f32(
17583 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
17584 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f32(float32x4_t a)17585 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
17586   return vreinterpretq_p64_f32(a);
17587 }
17588 
17589 // CHECK-LABEL: @test_vreinterpretq_p64_f64(
17590 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
17591 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f64(float64x2_t a)17592 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
17593   return vreinterpretq_p64_f64(a);
17594 }
17595 
17596 // CHECK-LABEL: @test_vreinterpretq_p64_p8(
17597 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17598 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p8(poly8x16_t a)17599 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
17600   return vreinterpretq_p64_p8(a);
17601 }
17602 
17603 // CHECK-LABEL: @test_vreinterpretq_p64_p16(
17604 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17605 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p16(poly16x8_t a)17606 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
17607   return vreinterpretq_p64_p16(a);
17608 }
17609 
17610 // CHECK-LABEL: @test_vabds_f32(
17611 // CHECK:   [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b)
17612 // CHECK:   ret float [[VABDS_F32_I]]
test_vabds_f32(float32_t a,float32_t b)17613 float32_t test_vabds_f32(float32_t a, float32_t b) {
17614   return vabds_f32(a, b);
17615 }
17616 
17617 // CHECK-LABEL: @test_vabdd_f64(
17618 // CHECK:   [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b)
17619 // CHECK:   ret double [[VABDD_F64_I]]
test_vabdd_f64(float64_t a,float64_t b)17620 float64_t test_vabdd_f64(float64_t a, float64_t b) {
17621   return vabdd_f64(a, b);
17622 }
17623 
17624 // CHECK-LABEL: @test_vuqaddq_s8(
17625 // CHECK: entry:
17626 // CHECK-NEXT:  [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17627 // CHECK-NEXT:  ret <16 x i8> [[V]]
test_vuqaddq_s8(int8x16_t a,uint8x16_t b)17628 int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) {
17629   return vuqaddq_s8(a, b);
17630 }
17631 
17632 // CHECK-LABEL: @test_vuqaddq_s32(
17633 // CHECK: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17634 // CHECK-NEXT:  ret <4 x i32> [[V]]
test_vuqaddq_s32(int32x4_t a,uint32x4_t b)17635 int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) {
17636   return vuqaddq_s32(a, b);
17637 }
17638 
17639 // CHECK-LABEL: @test_vuqaddq_s64(
17640 // CHECK: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17641 // CHECK-NEXT:  ret <2 x i64> [[V]]
test_vuqaddq_s64(int64x2_t a,uint64x2_t b)17642 int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) {
17643   return vuqaddq_s64(a, b);
17644 }
17645 
17646 // CHECK-LABEL: @test_vuqaddq_s16(
17647 // CHECK: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17648 // CHECK-NEXT:  ret <8 x i16> [[V]]
test_vuqaddq_s16(int16x8_t a,uint16x8_t b)17649 int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) {
17650   return vuqaddq_s16(a, b);
17651 }
17652 
17653 // CHECK-LABEL: @test_vuqadd_s8(
17654 // CHECK: entry:
17655 // CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17656 // CHECK-NEXT: ret <8 x i8> [[V]]
test_vuqadd_s8(int8x8_t a,uint8x8_t b)17657 int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) {
17658   return vuqadd_s8(a, b);
17659 }
17660 
17661 // CHECK-LABEL: @test_vuqadd_s32(
17662 // CHECK: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17663 // CHECK-NEXT:  ret <2 x i32> [[V]]
test_vuqadd_s32(int32x2_t a,uint32x2_t b)17664 int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) {
17665   return vuqadd_s32(a, b);
17666 }
17667 
17668 // CHECK-LABEL: @test_vuqadd_s64(
17669 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17670 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17671 // CHECK:   [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17672 // CHECK:   ret <1 x i64> [[VUQADD2_I]]
test_vuqadd_s64(int64x1_t a,uint64x1_t b)17673 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
17674   return vuqadd_s64(a, b);
17675 }
17676 
17677 // CHECK-LABEL: @test_vuqadd_s16(
17678 // CHECK: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17679 // CHECK-NEXT:  ret <4 x i16> [[V]]
test_vuqadd_s16(int16x4_t a,uint16x4_t b)17680 int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) {
17681   return vuqadd_s16(a, b);
17682 }
17683 
17684 // CHECK-LABEL: @test_vsqadd_u64(
17685 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17686 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17687 // CHECK:   [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17688 // CHECK:   ret <1 x i64> [[VSQADD2_I]]
test_vsqadd_u64(uint64x1_t a,int64x1_t b)17689 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
17690   return vsqadd_u64(a, b);
17691 }
17692 
17693 // CHECK-LABEL: @test_vsqadd_u8(
17694 // CHECK:   [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17695 // CHECK:   ret <8 x i8> [[VSQADD_I]]
test_vsqadd_u8(uint8x8_t a,int8x8_t b)17696 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
17697   return vsqadd_u8(a, b);
17698 }
17699 
17700 // CHECK-LABEL: @test_vsqaddq_u8(
17701 // CHECK:   [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17702 // CHECK:   ret <16 x i8> [[VSQADD_I]]
test_vsqaddq_u8(uint8x16_t a,int8x16_t b)17703 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
17704   return vsqaddq_u8(a, b);
17705 }
17706 
17707 // CHECK-LABEL: @test_vsqadd_u16(
17708 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17709 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17710 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17711 // CHECK:   ret <4 x i16> [[VSQADD2_I]]
test_vsqadd_u16(uint16x4_t a,int16x4_t b)17712 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
17713   return vsqadd_u16(a, b);
17714 }
17715 
17716 // CHECK-LABEL: @test_vsqaddq_u16(
17717 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17718 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17719 // CHECK:   [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17720 // CHECK:   ret <8 x i16> [[VSQADD2_I]]
test_vsqaddq_u16(uint16x8_t a,int16x8_t b)17721 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
17722   return vsqaddq_u16(a, b);
17723 }
17724 
17725 // CHECK-LABEL: @test_vsqadd_u32(
17726 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17727 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17728 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17729 // CHECK:   ret <2 x i32> [[VSQADD2_I]]
test_vsqadd_u32(uint32x2_t a,int32x2_t b)17730 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
17731   return vsqadd_u32(a, b);
17732 }
17733 
17734 // CHECK-LABEL: @test_vsqaddq_u32(
17735 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17736 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17737 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17738 // CHECK:   ret <4 x i32> [[VSQADD2_I]]
test_vsqaddq_u32(uint32x4_t a,int32x4_t b)17739 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
17740   return vsqaddq_u32(a, b);
17741 }
17742 
17743 // CHECK-LABEL: @test_vsqaddq_u64(
17744 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17745 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17746 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17747 // CHECK:   ret <2 x i64> [[VSQADD2_I]]
test_vsqaddq_u64(uint64x2_t a,int64x2_t b)17748 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
17749   return vsqaddq_u64(a, b);
17750 }
17751 
17752 // CHECK-LABEL: @test_vabs_s64(
17753 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17754 // CHECK:   [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
17755 // CHECK:   ret <1 x i64> [[VABS1_I]]
test_vabs_s64(int64x1_t a)17756 int64x1_t test_vabs_s64(int64x1_t a) {
17757   return vabs_s64(a);
17758 }
17759 
17760 // CHECK-LABEL: @test_vqabs_s64(
17761 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17762 // CHECK:   [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
17763 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
17764 // CHECK:   ret <1 x i64> [[VQABS_V1_I]]
test_vqabs_s64(int64x1_t a)17765 int64x1_t test_vqabs_s64(int64x1_t a) {
17766   return vqabs_s64(a);
17767 }
17768 
17769 // CHECK-LABEL: @test_vqneg_s64(
17770 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17771 // CHECK:   [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
17772 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
17773 // CHECK:   ret <1 x i64> [[VQNEG_V1_I]]
test_vqneg_s64(int64x1_t a)17774 int64x1_t test_vqneg_s64(int64x1_t a) {
17775   return vqneg_s64(a);
17776 }
17777 
17778 // CHECK-LABEL: @test_vneg_s64(
17779 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
17780 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vneg_s64(int64x1_t a)17781 int64x1_t test_vneg_s64(int64x1_t a) {
17782   return vneg_s64(a);
17783 }
17784 
17785 // CHECK-LABEL: @test_vaddv_f32(
17786 // CHECK:   [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
17787 // CHECK:   ret float [[VADDV_F32_I]]
test_vaddv_f32(float32x2_t a)17788 float32_t test_vaddv_f32(float32x2_t a) {
17789   return vaddv_f32(a);
17790 }
17791 
17792 // CHECK-LABEL: @test_vaddvq_f32(
17793 // CHECK:   [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
17794 // CHECK:   ret float [[VADDVQ_F32_I]]
test_vaddvq_f32(float32x4_t a)17795 float32_t test_vaddvq_f32(float32x4_t a) {
17796   return vaddvq_f32(a);
17797 }
17798 
17799 // CHECK-LABEL: @test_vaddvq_f64(
17800 // CHECK:   [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
17801 // CHECK:   ret double [[VADDVQ_F64_I]]
test_vaddvq_f64(float64x2_t a)17802 float64_t test_vaddvq_f64(float64x2_t a) {
17803   return vaddvq_f64(a);
17804 }
17805 
17806 // CHECK-LABEL: @test_vmaxv_f32(
17807 // CHECK:   [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
17808 // CHECK:   ret float [[VMAXV_F32_I]]
test_vmaxv_f32(float32x2_t a)17809 float32_t test_vmaxv_f32(float32x2_t a) {
17810   return vmaxv_f32(a);
17811 }
17812 
17813 // CHECK-LABEL: @test_vmaxvq_f64(
17814 // CHECK:   [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
17815 // CHECK:   ret double [[VMAXVQ_F64_I]]
test_vmaxvq_f64(float64x2_t a)17816 float64_t test_vmaxvq_f64(float64x2_t a) {
17817   return vmaxvq_f64(a);
17818 }
17819 
17820 // CHECK-LABEL: @test_vminv_f32(
17821 // CHECK:   [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
17822 // CHECK:   ret float [[VMINV_F32_I]]
test_vminv_f32(float32x2_t a)17823 float32_t test_vminv_f32(float32x2_t a) {
17824   return vminv_f32(a);
17825 }
17826 
17827 // CHECK-LABEL: @test_vminvq_f64(
17828 // CHECK:   [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
17829 // CHECK:   ret double [[VMINVQ_F64_I]]
test_vminvq_f64(float64x2_t a)17830 float64_t test_vminvq_f64(float64x2_t a) {
17831   return vminvq_f64(a);
17832 }
17833 
17834 // CHECK-LABEL: @test_vmaxnmvq_f64(
17835 // CHECK:   [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
17836 // CHECK:   ret double [[VMAXNMVQ_F64_I]]
test_vmaxnmvq_f64(float64x2_t a)17837 float64_t test_vmaxnmvq_f64(float64x2_t a) {
17838   return vmaxnmvq_f64(a);
17839 }
17840 
17841 // CHECK-LABEL: @test_vmaxnmv_f32(
17842 // CHECK:   [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
17843 // CHECK:   ret float [[VMAXNMV_F32_I]]
test_vmaxnmv_f32(float32x2_t a)17844 float32_t test_vmaxnmv_f32(float32x2_t a) {
17845   return vmaxnmv_f32(a);
17846 }
17847 
17848 // CHECK-LABEL: @test_vminnmvq_f64(
17849 // CHECK:   [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
17850 // CHECK:   ret double [[VMINNMVQ_F64_I]]
test_vminnmvq_f64(float64x2_t a)17851 float64_t test_vminnmvq_f64(float64x2_t a) {
17852   return vminnmvq_f64(a);
17853 }
17854 
17855 // CHECK-LABEL: @test_vminnmv_f32(
17856 // CHECK:   [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
17857 // CHECK:   ret float [[VMINNMV_F32_I]]
test_vminnmv_f32(float32x2_t a)17858 float32_t test_vminnmv_f32(float32x2_t a) {
17859   return vminnmv_f32(a);
17860 }
17861 
17862 // CHECK-LABEL: @test_vpaddq_s64(
17863 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17864 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17865 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_s64(int64x2_t a,int64x2_t b)17866 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
17867   return vpaddq_s64(a, b);
17868 }
17869 
17870 // CHECK-LABEL: @test_vpaddq_u64(
17871 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17872 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17873 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_u64(uint64x2_t a,uint64x2_t b)17874 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
17875   return vpaddq_u64(a, b);
17876 }
17877 
17878 // CHECK-LABEL: @test_vpaddd_u64(
17879 // CHECK:   [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17880 // CHECK:   ret i64 [[VPADDD_U64_I]]
test_vpaddd_u64(uint64x2_t a)17881 uint64_t test_vpaddd_u64(uint64x2_t a) {
17882   return vpaddd_u64(a);
17883 }
17884 
17885 // CHECK-LABEL: @test_vaddvq_s64(
17886 // CHECK:   [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
17887 // CHECK:   ret i64 [[VADDVQ_S64_I]]
test_vaddvq_s64(int64x2_t a)17888 int64_t test_vaddvq_s64(int64x2_t a) {
17889   return vaddvq_s64(a);
17890 }
17891 
17892 // CHECK-LABEL: @test_vaddvq_u64(
17893 // CHECK:   [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17894 // CHECK:   ret i64 [[VADDVQ_U64_I]]
test_vaddvq_u64(uint64x2_t a)17895 uint64_t test_vaddvq_u64(uint64x2_t a) {
17896   return vaddvq_u64(a);
17897 }
17898 
17899 // CHECK-LABEL: @test_vadd_f64(
17900 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, %b
17901 // CHECK:   ret <1 x double> [[ADD_I]]
test_vadd_f64(float64x1_t a,float64x1_t b)17902 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
17903   return vadd_f64(a, b);
17904 }
17905 
17906 // CHECK-LABEL: @test_vmul_f64(
17907 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %a, %b
17908 // CHECK:   ret <1 x double> [[MUL_I]]
test_vmul_f64(float64x1_t a,float64x1_t b)17909 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
17910   return vmul_f64(a, b);
17911 }
17912 
17913 // CHECK-LABEL: @test_vdiv_f64(
17914 // CHECK:   [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
17915 // CHECK:   ret <1 x double> [[DIV_I]]
test_vdiv_f64(float64x1_t a,float64x1_t b)17916 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
17917   return vdiv_f64(a, b);
17918 }
17919 
17920 // CHECK-LABEL: @test_vmla_f64(
17921 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17922 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
17923 // CHECK:   ret <1 x double> [[ADD_I]]
test_vmla_f64(float64x1_t a,float64x1_t b,float64x1_t c)17924 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17925   return vmla_f64(a, b, c);
17926 }
17927 
17928 // CHECK-LABEL: @test_vmls_f64(
17929 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17930 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
17931 // CHECK:   ret <1 x double> [[SUB_I]]
test_vmls_f64(float64x1_t a,float64x1_t b,float64x1_t c)17932 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17933   return vmls_f64(a, b, c);
17934 }
17935 
17936 // CHECK-LABEL: @test_vfma_f64(
17937 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17938 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17939 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17940 // CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
17941 // CHECK:   ret <1 x double> [[TMP3]]
test_vfma_f64(float64x1_t a,float64x1_t b,float64x1_t c)17942 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17943   return vfma_f64(a, b, c);
17944 }
17945 
17946 // CHECK-LABEL: @test_vfms_f64(
17947 // CHECK:   [[SUB_I:%.*]] = fneg <1 x double> %b
17948 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17949 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
17950 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17951 // CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
17952 // CHECK:   ret <1 x double> [[TMP3]]
test_vfms_f64(float64x1_t a,float64x1_t b,float64x1_t c)17953 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17954   return vfms_f64(a, b, c);
17955 }
17956 
17957 // CHECK-LABEL: @test_vsub_f64(
17958 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, %b
17959 // CHECK:   ret <1 x double> [[SUB_I]]
test_vsub_f64(float64x1_t a,float64x1_t b)17960 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
17961   return vsub_f64(a, b);
17962 }
17963 
17964 // CHECK-LABEL: @test_vabd_f64(
17965 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17966 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17967 // CHECK:   [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
17968 // CHECK:   ret <1 x double> [[VABD2_I]]
test_vabd_f64(float64x1_t a,float64x1_t b)17969 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
17970   return vabd_f64(a, b);
17971 }
17972 
17973 // CHECK-LABEL: @test_vmax_f64(
17974 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17975 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17976 // CHECK:   [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
17977 // CHECK:   ret <1 x double> [[VMAX2_I]]
test_vmax_f64(float64x1_t a,float64x1_t b)17978 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
17979   return vmax_f64(a, b);
17980 }
17981 
17982 // CHECK-LABEL: @test_vmin_f64(
17983 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17984 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17985 // CHECK:   [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
17986 // CHECK:   ret <1 x double> [[VMIN2_I]]
test_vmin_f64(float64x1_t a,float64x1_t b)17987 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
17988   return vmin_f64(a, b);
17989 }
17990 
17991 // CHECK-LABEL: @test_vmaxnm_f64(
17992 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17993 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17994 // CHECK:   [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
17995 // CHECK:   ret <1 x double> [[VMAXNM2_I]]
test_vmaxnm_f64(float64x1_t a,float64x1_t b)17996 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
17997   return vmaxnm_f64(a, b);
17998 }
17999 
18000 // CHECK-LABEL: @test_vminnm_f64(
18001 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18002 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18003 // CHECK:   [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
18004 // CHECK:   ret <1 x double> [[VMINNM2_I]]
test_vminnm_f64(float64x1_t a,float64x1_t b)18005 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
18006   return vminnm_f64(a, b);
18007 }
18008 
18009 // CHECK-LABEL: @test_vabs_f64(
18010 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18011 // CHECK:   [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
18012 // CHECK:   ret <1 x double> [[VABS1_I]]
test_vabs_f64(float64x1_t a)18013 float64x1_t test_vabs_f64(float64x1_t a) {
18014   return vabs_f64(a);
18015 }
18016 
18017 // CHECK-LABEL: @test_vneg_f64(
18018 // CHECK:   [[SUB_I:%.*]] = fneg <1 x double> %a
18019 // CHECK:   ret <1 x double> [[SUB_I]]
test_vneg_f64(float64x1_t a)18020 float64x1_t test_vneg_f64(float64x1_t a) {
18021   return vneg_f64(a);
18022 }
18023 
18024 // CHECK-LABEL: @test_vcvt_s64_f64(
18025 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18026 // CHECK:   [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64>
18027 // CHECK:   ret <1 x i64> [[TMP1]]
test_vcvt_s64_f64(float64x1_t a)18028 int64x1_t test_vcvt_s64_f64(float64x1_t a) {
18029   return vcvt_s64_f64(a);
18030 }
18031 
18032 // CHECK-LABEL: @test_vcvt_u64_f64(
18033 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18034 // CHECK:   [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64>
18035 // CHECK:   ret <1 x i64> [[TMP1]]
test_vcvt_u64_f64(float64x1_t a)18036 uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
18037   return vcvt_u64_f64(a);
18038 }
18039 
18040 // CHECK-LABEL: @test_vcvtn_s64_f64(
18041 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18042 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
18043 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_s64_f64(float64x1_t a)18044 int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
18045   return vcvtn_s64_f64(a);
18046 }
18047 
18048 // CHECK-LABEL: @test_vcvtn_u64_f64(
18049 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18050 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
18051 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_u64_f64(float64x1_t a)18052 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
18053   return vcvtn_u64_f64(a);
18054 }
18055 
18056 // CHECK-LABEL: @test_vcvtp_s64_f64(
18057 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18058 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
18059 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_s64_f64(float64x1_t a)18060 int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
18061   return vcvtp_s64_f64(a);
18062 }
18063 
18064 // CHECK-LABEL: @test_vcvtp_u64_f64(
18065 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18066 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
18067 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_u64_f64(float64x1_t a)18068 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
18069   return vcvtp_u64_f64(a);
18070 }
18071 
18072 // CHECK-LABEL: @test_vcvtm_s64_f64(
18073 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18074 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
18075 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_s64_f64(float64x1_t a)18076 int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
18077   return vcvtm_s64_f64(a);
18078 }
18079 
18080 // CHECK-LABEL: @test_vcvtm_u64_f64(
18081 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18082 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
18083 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_u64_f64(float64x1_t a)18084 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
18085   return vcvtm_u64_f64(a);
18086 }
18087 
18088 // CHECK-LABEL: @test_vcvta_s64_f64(
18089 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18090 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
18091 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
test_vcvta_s64_f64(float64x1_t a)18092 int64x1_t test_vcvta_s64_f64(float64x1_t a) {
18093   return vcvta_s64_f64(a);
18094 }
18095 
18096 // CHECK-LABEL: @test_vcvta_u64_f64(
18097 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18098 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
18099 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
test_vcvta_u64_f64(float64x1_t a)18100 uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
18101   return vcvta_u64_f64(a);
18102 }
18103 
18104 // CHECK-LABEL: @test_vcvt_f64_s64(
18105 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18106 // CHECK:   [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
18107 // CHECK:   ret <1 x double> [[VCVT_I]]
test_vcvt_f64_s64(int64x1_t a)18108 float64x1_t test_vcvt_f64_s64(int64x1_t a) {
18109   return vcvt_f64_s64(a);
18110 }
18111 
18112 // CHECK-LABEL: @test_vcvt_f64_u64(
18113 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18114 // CHECK:   [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
18115 // CHECK:   ret <1 x double> [[VCVT_I]]
test_vcvt_f64_u64(uint64x1_t a)18116 float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
18117   return vcvt_f64_u64(a);
18118 }
18119 
18120 // CHECK-LABEL: @test_vcvt_n_s64_f64(
18121 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18122 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18123 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18124 // CHECK:   ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_s64_f64(float64x1_t a)18125 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
18126   return vcvt_n_s64_f64(a, 64);
18127 }
18128 
18129 // CHECK-LABEL: @test_vcvt_n_u64_f64(
18130 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18131 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18132 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18133 // CHECK:   ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_u64_f64(float64x1_t a)18134 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
18135   return vcvt_n_u64_f64(a, 64);
18136 }
18137 
18138 // CHECK-LABEL: @test_vcvt_n_f64_s64(
18139 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18140 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18141 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18142 // CHECK:   ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_s64(int64x1_t a)18143 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
18144   return vcvt_n_f64_s64(a, 64);
18145 }
18146 
18147 // CHECK-LABEL: @test_vcvt_n_f64_u64(
18148 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18149 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18150 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18151 // CHECK:   ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_u64(uint64x1_t a)18152 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
18153   return vcvt_n_f64_u64(a, 64);
18154 }
18155 
18156 // CHECK-LABEL: @test_vrndn_f64(
18157 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18158 // CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
18159 // CHECK:   ret <1 x double> [[VRNDN1_I]]
test_vrndn_f64(float64x1_t a)18160 float64x1_t test_vrndn_f64(float64x1_t a) {
18161   return vrndn_f64(a);
18162 }
18163 
18164 // CHECK-LABEL: @test_vrnda_f64(
18165 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18166 // CHECK:   [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
18167 // CHECK:   ret <1 x double> [[VRNDA1_I]]
test_vrnda_f64(float64x1_t a)18168 float64x1_t test_vrnda_f64(float64x1_t a) {
18169   return vrnda_f64(a);
18170 }
18171 
18172 // CHECK-LABEL: @test_vrndp_f64(
18173 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18174 // CHECK:   [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
18175 // CHECK:   ret <1 x double> [[VRNDP1_I]]
test_vrndp_f64(float64x1_t a)18176 float64x1_t test_vrndp_f64(float64x1_t a) {
18177   return vrndp_f64(a);
18178 }
18179 
18180 // CHECK-LABEL: @test_vrndm_f64(
18181 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18182 // CHECK:   [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
18183 // CHECK:   ret <1 x double> [[VRNDM1_I]]
test_vrndm_f64(float64x1_t a)18184 float64x1_t test_vrndm_f64(float64x1_t a) {
18185   return vrndm_f64(a);
18186 }
18187 
18188 // CHECK-LABEL: @test_vrndx_f64(
18189 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18190 // CHECK:   [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
18191 // CHECK:   ret <1 x double> [[VRNDX1_I]]
test_vrndx_f64(float64x1_t a)18192 float64x1_t test_vrndx_f64(float64x1_t a) {
18193   return vrndx_f64(a);
18194 }
18195 
18196 // CHECK-LABEL: @test_vrnd_f64(
18197 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18198 // CHECK:   [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
18199 // CHECK:   ret <1 x double> [[VRNDZ1_I]]
test_vrnd_f64(float64x1_t a)18200 float64x1_t test_vrnd_f64(float64x1_t a) {
18201   return vrnd_f64(a);
18202 }
18203 
18204 // CHECK-LABEL: @test_vrndi_f64(
18205 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18206 // CHECK:   [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
18207 // CHECK:   ret <1 x double> [[VRNDI1_I]]
test_vrndi_f64(float64x1_t a)18208 float64x1_t test_vrndi_f64(float64x1_t a) {
18209   return vrndi_f64(a);
18210 }
18211 
18212 // CHECK-LABEL: @test_vrsqrte_f64(
18213 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18214 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
18215 // CHECK:   ret <1 x double> [[VRSQRTE_V1_I]]
test_vrsqrte_f64(float64x1_t a)18216 float64x1_t test_vrsqrte_f64(float64x1_t a) {
18217   return vrsqrte_f64(a);
18218 }
18219 
18220 // CHECK-LABEL: @test_vrecpe_f64(
18221 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18222 // CHECK:   [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
18223 // CHECK:   ret <1 x double> [[VRECPE_V1_I]]
test_vrecpe_f64(float64x1_t a)18224 float64x1_t test_vrecpe_f64(float64x1_t a) {
18225   return vrecpe_f64(a);
18226 }
18227 
18228 // CHECK-LABEL: @test_vsqrt_f64(
18229 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18230 // CHECK:   [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
18231 // CHECK:   ret <1 x double> [[VSQRT_I]]
test_vsqrt_f64(float64x1_t a)18232 float64x1_t test_vsqrt_f64(float64x1_t a) {
18233   return vsqrt_f64(a);
18234 }
18235 
18236 // CHECK-LABEL: @test_vrecps_f64(
18237 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18238 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18239 // CHECK:   [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
18240 // CHECK:   ret <1 x double> [[VRECPS_V2_I]]
test_vrecps_f64(float64x1_t a,float64x1_t b)18241 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
18242   return vrecps_f64(a, b);
18243 }
18244 
18245 // CHECK-LABEL: @test_vrsqrts_f64(
18246 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18247 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18248 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
18249 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
18250 // CHECK:   ret <1 x double> [[VRSQRTS_V2_I]]
test_vrsqrts_f64(float64x1_t a,float64x1_t b)18251 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
18252   return vrsqrts_f64(a, b);
18253 }
18254 
18255 // CHECK-LABEL: @test_vminv_s32(
18256 // CHECK:   [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
18257 // CHECK:   ret i32 [[VMINV_S32_I]]
test_vminv_s32(int32x2_t a)18258 int32_t test_vminv_s32(int32x2_t a) {
18259   return vminv_s32(a);
18260 }
18261 
18262 // CHECK-LABEL: @test_vminv_u32(
18263 // CHECK:   [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
18264 // CHECK:   ret i32 [[VMINV_U32_I]]
test_vminv_u32(uint32x2_t a)18265 uint32_t test_vminv_u32(uint32x2_t a) {
18266   return vminv_u32(a);
18267 }
18268 
18269 // CHECK-LABEL: @test_vmaxv_s32(
18270 // CHECK:   [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
18271 // CHECK:   ret i32 [[VMAXV_S32_I]]
test_vmaxv_s32(int32x2_t a)18272 int32_t test_vmaxv_s32(int32x2_t a) {
18273   return vmaxv_s32(a);
18274 }
18275 
18276 // CHECK-LABEL: @test_vmaxv_u32(
18277 // CHECK:   [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
18278 // CHECK:   ret i32 [[VMAXV_U32_I]]
test_vmaxv_u32(uint32x2_t a)18279 uint32_t test_vmaxv_u32(uint32x2_t a) {
18280   return vmaxv_u32(a);
18281 }
18282 
18283 // CHECK-LABEL: @test_vaddv_s32(
18284 // CHECK:   [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
18285 // CHECK:   ret i32 [[VADDV_S32_I]]
test_vaddv_s32(int32x2_t a)18286 int32_t test_vaddv_s32(int32x2_t a) {
18287   return vaddv_s32(a);
18288 }
18289 
18290 // CHECK-LABEL: @test_vaddv_u32(
18291 // CHECK:   [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
18292 // CHECK:   ret i32 [[VADDV_U32_I]]
test_vaddv_u32(uint32x2_t a)18293 uint32_t test_vaddv_u32(uint32x2_t a) {
18294   return vaddv_u32(a);
18295 }
18296 
18297 // CHECK-LABEL: @test_vaddlv_s32(
18298 // CHECK:   [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
18299 // CHECK:   ret i64 [[VADDLV_S32_I]]
test_vaddlv_s32(int32x2_t a)18300 int64_t test_vaddlv_s32(int32x2_t a) {
18301   return vaddlv_s32(a);
18302 }
18303 
18304 // CHECK-LABEL: @test_vaddlv_u32(
18305 // CHECK:   [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
18306 // CHECK:   ret i64 [[VADDLV_U32_I]]
test_vaddlv_u32(uint32x2_t a)18307 uint64_t test_vaddlv_u32(uint32x2_t a) {
18308   return vaddlv_u32(a);
18309 }
18310