1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone \
3 // RUN: -flax-vector-conversions=none -emit-llvm -o - %s \
4 // RUN: | opt -S -mem2reg \
5 // RUN: | FileCheck %s
6
7 // Test new aarch64 intrinsics and types
8
9 #include <arm_neon.h>
10
11 // CHECK-LABEL: @test_vadd_s8(
12 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
13 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t v1,int8x8_t v2)14 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
15 return vadd_s8(v1, v2);
16 }
17
18 // CHECK-LABEL: @test_vadd_s16(
19 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
20 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t v1,int16x4_t v2)21 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
22 return vadd_s16(v1, v2);
23 }
24
25 // CHECK-LABEL: @test_vadd_s32(
26 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
27 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t v1,int32x2_t v2)28 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
29 return vadd_s32(v1, v2);
30 }
31
32 // CHECK-LABEL: @test_vadd_s64(
33 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
34 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t v1,int64x1_t v2)35 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
36 return vadd_s64(v1, v2);
37 }
38
39 // CHECK-LABEL: @test_vadd_f32(
40 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
41 // CHECK: ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t v1,float32x2_t v2)42 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
43 return vadd_f32(v1, v2);
44 }
45
46 // CHECK-LABEL: @test_vadd_u8(
47 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
48 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t v1,uint8x8_t v2)49 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
50 return vadd_u8(v1, v2);
51 }
52
53 // CHECK-LABEL: @test_vadd_u16(
54 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
55 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t v1,uint16x4_t v2)56 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
57 return vadd_u16(v1, v2);
58 }
59
60 // CHECK-LABEL: @test_vadd_u32(
61 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
62 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t v1,uint32x2_t v2)63 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
64 return vadd_u32(v1, v2);
65 }
66
67 // CHECK-LABEL: @test_vadd_u64(
68 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
69 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t v1,uint64x1_t v2)70 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
71 return vadd_u64(v1, v2);
72 }
73
74 // CHECK-LABEL: @test_vaddq_s8(
75 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
76 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t v1,int8x16_t v2)77 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
78 return vaddq_s8(v1, v2);
79 }
80
81 // CHECK-LABEL: @test_vaddq_s16(
82 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
83 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t v1,int16x8_t v2)84 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
85 return vaddq_s16(v1, v2);
86 }
87
88 // CHECK-LABEL: @test_vaddq_s32(
89 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
90 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t v1,int32x4_t v2)91 int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
92 return vaddq_s32(v1, v2);
93 }
94
95 // CHECK-LABEL: @test_vaddq_s64(
96 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
97 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t v1,int64x2_t v2)98 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
99 return vaddq_s64(v1, v2);
100 }
101
102 // CHECK-LABEL: @test_vaddq_f32(
103 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
104 // CHECK: ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t v1,float32x4_t v2)105 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
106 return vaddq_f32(v1, v2);
107 }
108
109 // CHECK-LABEL: @test_vaddq_f64(
110 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
111 // CHECK: ret <2 x double> [[ADD_I]]
test_vaddq_f64(float64x2_t v1,float64x2_t v2)112 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
113 return vaddq_f64(v1, v2);
114 }
115
116 // CHECK-LABEL: @test_vaddq_u8(
117 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
118 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t v1,uint8x16_t v2)119 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
120 return vaddq_u8(v1, v2);
121 }
122
123 // CHECK-LABEL: @test_vaddq_u16(
124 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
125 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t v1,uint16x8_t v2)126 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
127 return vaddq_u16(v1, v2);
128 }
129
130 // CHECK-LABEL: @test_vaddq_u32(
131 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
132 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t v1,uint32x4_t v2)133 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
134 return vaddq_u32(v1, v2);
135 }
136
137 // CHECK-LABEL: @test_vaddq_u64(
138 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
139 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t v1,uint64x2_t v2)140 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
141 return vaddq_u64(v1, v2);
142 }
143
144 // CHECK-LABEL: @test_vsub_s8(
145 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
146 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t v1,int8x8_t v2)147 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
148 return vsub_s8(v1, v2);
149 }
150
151 // CHECK-LABEL: @test_vsub_s16(
152 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
153 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t v1,int16x4_t v2)154 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
155 return vsub_s16(v1, v2);
156 }
157
158 // CHECK-LABEL: @test_vsub_s32(
159 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
160 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t v1,int32x2_t v2)161 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
162 return vsub_s32(v1, v2);
163 }
164
165 // CHECK-LABEL: @test_vsub_s64(
166 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
167 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t v1,int64x1_t v2)168 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
169 return vsub_s64(v1, v2);
170 }
171
172 // CHECK-LABEL: @test_vsub_f32(
173 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
174 // CHECK: ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t v1,float32x2_t v2)175 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
176 return vsub_f32(v1, v2);
177 }
178
179 // CHECK-LABEL: @test_vsub_u8(
180 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
181 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t v1,uint8x8_t v2)182 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
183 return vsub_u8(v1, v2);
184 }
185
186 // CHECK-LABEL: @test_vsub_u16(
187 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
188 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t v1,uint16x4_t v2)189 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
190 return vsub_u16(v1, v2);
191 }
192
193 // CHECK-LABEL: @test_vsub_u32(
194 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
195 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t v1,uint32x2_t v2)196 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
197 return vsub_u32(v1, v2);
198 }
199
200 // CHECK-LABEL: @test_vsub_u64(
201 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
202 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t v1,uint64x1_t v2)203 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
204 return vsub_u64(v1, v2);
205 }
206
207 // CHECK-LABEL: @test_vsubq_s8(
208 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
209 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t v1,int8x16_t v2)210 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
211 return vsubq_s8(v1, v2);
212 }
213
214 // CHECK-LABEL: @test_vsubq_s16(
215 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
216 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t v1,int16x8_t v2)217 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
218 return vsubq_s16(v1, v2);
219 }
220
221 // CHECK-LABEL: @test_vsubq_s32(
222 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
223 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t v1,int32x4_t v2)224 int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) {
225 return vsubq_s32(v1, v2);
226 }
227
228 // CHECK-LABEL: @test_vsubq_s64(
229 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
230 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t v1,int64x2_t v2)231 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
232 return vsubq_s64(v1, v2);
233 }
234
235 // CHECK-LABEL: @test_vsubq_f32(
236 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
237 // CHECK: ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t v1,float32x4_t v2)238 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
239 return vsubq_f32(v1, v2);
240 }
241
242 // CHECK-LABEL: @test_vsubq_f64(
243 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
244 // CHECK: ret <2 x double> [[SUB_I]]
test_vsubq_f64(float64x2_t v1,float64x2_t v2)245 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
246 return vsubq_f64(v1, v2);
247 }
248
249 // CHECK-LABEL: @test_vsubq_u8(
250 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
251 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t v1,uint8x16_t v2)252 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
253 return vsubq_u8(v1, v2);
254 }
255
256 // CHECK-LABEL: @test_vsubq_u16(
257 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
258 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t v1,uint16x8_t v2)259 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
260 return vsubq_u16(v1, v2);
261 }
262
263 // CHECK-LABEL: @test_vsubq_u32(
264 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
265 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t v1,uint32x4_t v2)266 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
267 return vsubq_u32(v1, v2);
268 }
269
270 // CHECK-LABEL: @test_vsubq_u64(
271 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
272 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t v1,uint64x2_t v2)273 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
274 return vsubq_u64(v1, v2);
275 }
276
277 // CHECK-LABEL: @test_vmul_s8(
278 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
279 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t v1,int8x8_t v2)280 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
281 return vmul_s8(v1, v2);
282 }
283
284 // CHECK-LABEL: @test_vmul_s16(
285 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
286 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t v1,int16x4_t v2)287 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
288 return vmul_s16(v1, v2);
289 }
290
291 // CHECK-LABEL: @test_vmul_s32(
292 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
293 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t v1,int32x2_t v2)294 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
295 return vmul_s32(v1, v2);
296 }
297
298 // CHECK-LABEL: @test_vmul_f32(
299 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
300 // CHECK: ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t v1,float32x2_t v2)301 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
302 return vmul_f32(v1, v2);
303 }
304
305 // CHECK-LABEL: @test_vmul_u8(
306 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
307 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t v1,uint8x8_t v2)308 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
309 return vmul_u8(v1, v2);
310 }
311
312 // CHECK-LABEL: @test_vmul_u16(
313 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
314 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t v1,uint16x4_t v2)315 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
316 return vmul_u16(v1, v2);
317 }
318
319 // CHECK-LABEL: @test_vmul_u32(
320 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
321 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t v1,uint32x2_t v2)322 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
323 return vmul_u32(v1, v2);
324 }
325
326 // CHECK-LABEL: @test_vmulq_s8(
327 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
328 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t v1,int8x16_t v2)329 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
330 return vmulq_s8(v1, v2);
331 }
332
333 // CHECK-LABEL: @test_vmulq_s16(
334 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
335 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t v1,int16x8_t v2)336 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
337 return vmulq_s16(v1, v2);
338 }
339
340 // CHECK-LABEL: @test_vmulq_s32(
341 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
342 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t v1,int32x4_t v2)343 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
344 return vmulq_s32(v1, v2);
345 }
346
347 // CHECK-LABEL: @test_vmulq_u8(
348 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
349 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t v1,uint8x16_t v2)350 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
351 return vmulq_u8(v1, v2);
352 }
353
354 // CHECK-LABEL: @test_vmulq_u16(
355 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
356 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t v1,uint16x8_t v2)357 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
358 return vmulq_u16(v1, v2);
359 }
360
361 // CHECK-LABEL: @test_vmulq_u32(
362 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
363 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t v1,uint32x4_t v2)364 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
365 return vmulq_u32(v1, v2);
366 }
367
368 // CHECK-LABEL: @test_vmulq_f32(
369 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
370 // CHECK: ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t v1,float32x4_t v2)371 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
372 return vmulq_f32(v1, v2);
373 }
374
375 // CHECK-LABEL: @test_vmulq_f64(
376 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
377 // CHECK: ret <2 x double> [[MUL_I]]
test_vmulq_f64(float64x2_t v1,float64x2_t v2)378 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
379 return vmulq_f64(v1, v2);
380 }
381
382 // CHECK-LABEL: @test_vmul_p8(
383 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
384 // CHECK: ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t v1,poly8x8_t v2)385 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
386 return vmul_p8(v1, v2);
387 }
388
389 // CHECK-LABEL: @test_vmulq_p8(
390 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
391 // CHECK: ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t v1,poly8x16_t v2)392 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
393 return vmulq_p8(v1, v2);
394 }
395
396 // CHECK-LABEL: @test_vmla_s8(
397 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
398 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
399 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)400 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
401 return vmla_s8(v1, v2, v3);
402 }
403
404 // CHECK-LABEL: @test_vmla_s16(
405 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
406 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
407 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
408 // CHECK: ret <8 x i8> [[TMP0]]
test_vmla_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)409 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
410 return (int8x8_t)vmla_s16(v1, v2, v3);
411 }
412
413 // CHECK-LABEL: @test_vmla_s32(
414 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
415 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
416 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)417 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
418 return vmla_s32(v1, v2, v3);
419 }
420
421 // CHECK-LABEL: @test_vmla_f32(
422 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
423 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
424 // CHECK: ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)425 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
426 return vmla_f32(v1, v2, v3);
427 }
428
429 // CHECK-LABEL: @test_vmla_u8(
430 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
431 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
432 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)433 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
434 return vmla_u8(v1, v2, v3);
435 }
436
437 // CHECK-LABEL: @test_vmla_u16(
438 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
439 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
440 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)441 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
442 return vmla_u16(v1, v2, v3);
443 }
444
445 // CHECK-LABEL: @test_vmla_u32(
446 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
447 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
448 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)449 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
450 return vmla_u32(v1, v2, v3);
451 }
452
453 // CHECK-LABEL: @test_vmlaq_s8(
454 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
455 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
456 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)457 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
458 return vmlaq_s8(v1, v2, v3);
459 }
460
461 // CHECK-LABEL: @test_vmlaq_s16(
462 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
463 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
464 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)465 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
466 return vmlaq_s16(v1, v2, v3);
467 }
468
469 // CHECK-LABEL: @test_vmlaq_s32(
470 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
471 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
472 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)473 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
474 return vmlaq_s32(v1, v2, v3);
475 }
476
477 // CHECK-LABEL: @test_vmlaq_f32(
478 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
479 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
480 // CHECK: ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)481 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
482 return vmlaq_f32(v1, v2, v3);
483 }
484
485 // CHECK-LABEL: @test_vmlaq_u8(
486 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
487 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
488 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)489 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
490 return vmlaq_u8(v1, v2, v3);
491 }
492
493 // CHECK-LABEL: @test_vmlaq_u16(
494 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
495 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
496 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)497 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
498 return vmlaq_u16(v1, v2, v3);
499 }
500
501 // CHECK-LABEL: @test_vmlaq_u32(
502 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
503 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
504 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)505 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
506 return vmlaq_u32(v1, v2, v3);
507 }
508
509 // CHECK-LABEL: @test_vmlaq_f64(
510 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
511 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
512 // CHECK: ret <2 x double> [[ADD_I]]
test_vmlaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)513 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
514 return vmlaq_f64(v1, v2, v3);
515 }
516
517 // CHECK-LABEL: @test_vmls_s8(
518 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
519 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
520 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)521 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
522 return vmls_s8(v1, v2, v3);
523 }
524
525 // CHECK-LABEL: @test_vmls_s16(
526 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
527 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
528 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
529 // CHECK: ret <8 x i8> [[TMP0]]
test_vmls_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)530 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
531 return (int8x8_t)vmls_s16(v1, v2, v3);
532 }
533
534 // CHECK-LABEL: @test_vmls_s32(
535 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
536 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
537 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)538 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
539 return vmls_s32(v1, v2, v3);
540 }
541
542 // CHECK-LABEL: @test_vmls_f32(
543 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
544 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
545 // CHECK: ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)546 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
547 return vmls_f32(v1, v2, v3);
548 }
549
550 // CHECK-LABEL: @test_vmls_u8(
551 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
552 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
553 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)554 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
555 return vmls_u8(v1, v2, v3);
556 }
557
558 // CHECK-LABEL: @test_vmls_u16(
559 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
560 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
561 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)562 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
563 return vmls_u16(v1, v2, v3);
564 }
565
566 // CHECK-LABEL: @test_vmls_u32(
567 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
568 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
569 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)570 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
571 return vmls_u32(v1, v2, v3);
572 }
573
574 // CHECK-LABEL: @test_vmlsq_s8(
575 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
576 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
577 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)578 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
579 return vmlsq_s8(v1, v2, v3);
580 }
581
582 // CHECK-LABEL: @test_vmlsq_s16(
583 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
584 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
585 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)586 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
587 return vmlsq_s16(v1, v2, v3);
588 }
589
590 // CHECK-LABEL: @test_vmlsq_s32(
591 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
592 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
593 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)594 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
595 return vmlsq_s32(v1, v2, v3);
596 }
597
598 // CHECK-LABEL: @test_vmlsq_f32(
599 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
600 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
601 // CHECK: ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)602 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
603 return vmlsq_f32(v1, v2, v3);
604 }
605
606 // CHECK-LABEL: @test_vmlsq_u8(
607 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
608 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
609 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)610 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
611 return vmlsq_u8(v1, v2, v3);
612 }
613
614 // CHECK-LABEL: @test_vmlsq_u16(
615 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
616 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
617 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)618 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
619 return vmlsq_u16(v1, v2, v3);
620 }
621
622 // CHECK-LABEL: @test_vmlsq_u32(
623 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
624 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
625 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)626 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
627 return vmlsq_u32(v1, v2, v3);
628 }
629
630 // CHECK-LABEL: @test_vmlsq_f64(
631 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
632 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
633 // CHECK: ret <2 x double> [[SUB_I]]
test_vmlsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)634 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
635 return vmlsq_f64(v1, v2, v3);
636 }
637
638 // CHECK-LABEL: @test_vfma_f32(
639 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
640 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
641 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
642 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
643 // CHECK: ret <2 x float> [[TMP3]]
test_vfma_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)644 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
645 return vfma_f32(v1, v2, v3);
646 }
647
648 // CHECK-LABEL: @test_vfmaq_f32(
649 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
650 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
651 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
652 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
653 // CHECK: ret <4 x float> [[TMP3]]
test_vfmaq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)654 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
655 return vfmaq_f32(v1, v2, v3);
656 }
657
658 // CHECK-LABEL: @test_vfmaq_f64(
659 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
660 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
661 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
662 // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
663 // CHECK: ret <2 x double> [[TMP3]]
test_vfmaq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)664 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
665 return vfmaq_f64(v1, v2, v3);
666 }
667
668 // CHECK-LABEL: @test_vfms_f32(
669 // CHECK: [[SUB_I:%.*]] = fneg <2 x float> %v2
670 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
671 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
672 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
673 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
674 // CHECK: ret <2 x float> [[TMP3]]
test_vfms_f32(float32x2_t v1,float32x2_t v2,float32x2_t v3)675 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
676 return vfms_f32(v1, v2, v3);
677 }
678
679 // CHECK-LABEL: @test_vfmsq_f32(
680 // CHECK: [[SUB_I:%.*]] = fneg <4 x float> %v2
681 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
682 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
683 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
684 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
685 // CHECK: ret <4 x float> [[TMP3]]
test_vfmsq_f32(float32x4_t v1,float32x4_t v2,float32x4_t v3)686 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
687 return vfmsq_f32(v1, v2, v3);
688 }
689
690 // CHECK-LABEL: @test_vfmsq_f64(
691 // CHECK: [[SUB_I:%.*]] = fneg <2 x double> %v2
692 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
693 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
694 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
695 // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
696 // CHECK: ret <2 x double> [[TMP3]]
test_vfmsq_f64(float64x2_t v1,float64x2_t v2,float64x2_t v3)697 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
698 return vfmsq_f64(v1, v2, v3);
699 }
700
701 // CHECK-LABEL: @test_vdivq_f64(
702 // CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
703 // CHECK: ret <2 x double> [[DIV_I]]
test_vdivq_f64(float64x2_t v1,float64x2_t v2)704 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
705 return vdivq_f64(v1, v2);
706 }
707
708 // CHECK-LABEL: @test_vdivq_f32(
709 // CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
710 // CHECK: ret <4 x float> [[DIV_I]]
test_vdivq_f32(float32x4_t v1,float32x4_t v2)711 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
712 return vdivq_f32(v1, v2);
713 }
714
715 // CHECK-LABEL: @test_vdiv_f32(
716 // CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
717 // CHECK: ret <2 x float> [[DIV_I]]
test_vdiv_f32(float32x2_t v1,float32x2_t v2)718 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
719 return vdiv_f32(v1, v2);
720 }
721
722 // CHECK-LABEL: @test_vaba_s8(
723 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
724 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
725 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t v1,int8x8_t v2,int8x8_t v3)726 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
727 return vaba_s8(v1, v2, v3);
728 }
729
730 // CHECK-LABEL: @test_vaba_s16(
731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
732 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
733 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
734 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
735 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t v1,int16x4_t v2,int16x4_t v3)736 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
737 return vaba_s16(v1, v2, v3);
738 }
739
740 // CHECK-LABEL: @test_vaba_s32(
741 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
742 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
743 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
744 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
745 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t v1,int32x2_t v2,int32x2_t v3)746 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
747 return vaba_s32(v1, v2, v3);
748 }
749
750 // CHECK-LABEL: @test_vaba_u8(
751 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
752 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
753 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)754 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
755 return vaba_u8(v1, v2, v3);
756 }
757
758 // CHECK-LABEL: @test_vaba_u16(
759 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
760 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
761 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
762 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
763 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)764 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
765 return vaba_u16(v1, v2, v3);
766 }
767
768 // CHECK-LABEL: @test_vaba_u32(
769 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
770 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
771 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
772 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
773 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)774 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
775 return vaba_u32(v1, v2, v3);
776 }
777
778 // CHECK-LABEL: @test_vabaq_s8(
779 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
780 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
781 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t v1,int8x16_t v2,int8x16_t v3)782 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
783 return vabaq_s8(v1, v2, v3);
784 }
785
786 // CHECK-LABEL: @test_vabaq_s16(
787 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
788 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
789 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
790 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
791 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t v1,int16x8_t v2,int16x8_t v3)792 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
793 return vabaq_s16(v1, v2, v3);
794 }
795
796 // CHECK-LABEL: @test_vabaq_s32(
797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
798 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
799 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
800 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
801 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t v1,int32x4_t v2,int32x4_t v3)802 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
803 return vabaq_s32(v1, v2, v3);
804 }
805
806 // CHECK-LABEL: @test_vabaq_u8(
807 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
808 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
809 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)810 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
811 return vabaq_u8(v1, v2, v3);
812 }
813
814 // CHECK-LABEL: @test_vabaq_u16(
815 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
816 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
817 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
818 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
819 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)820 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
821 return vabaq_u16(v1, v2, v3);
822 }
823
824 // CHECK-LABEL: @test_vabaq_u32(
825 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
826 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
827 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
828 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
829 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t v1,uint32x4_t v2,uint32x4_t v3)830 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
831 return vabaq_u32(v1, v2, v3);
832 }
833
834 // CHECK-LABEL: @test_vabd_s8(
835 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
836 // CHECK: ret <8 x i8> [[VABD_I]]
test_vabd_s8(int8x8_t v1,int8x8_t v2)837 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
838 return vabd_s8(v1, v2);
839 }
840
841 // CHECK-LABEL: @test_vabd_s16(
842 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
843 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
844 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
845 // CHECK: ret <4 x i16> [[VABD2_I]]
test_vabd_s16(int16x4_t v1,int16x4_t v2)846 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
847 return vabd_s16(v1, v2);
848 }
849
850 // CHECK-LABEL: @test_vabd_s32(
851 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
852 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
853 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
854 // CHECK: ret <2 x i32> [[VABD2_I]]
test_vabd_s32(int32x2_t v1,int32x2_t v2)855 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
856 return vabd_s32(v1, v2);
857 }
858
859 // CHECK-LABEL: @test_vabd_u8(
860 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
861 // CHECK: ret <8 x i8> [[VABD_I]]
test_vabd_u8(uint8x8_t v1,uint8x8_t v2)862 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
863 return vabd_u8(v1, v2);
864 }
865
866 // CHECK-LABEL: @test_vabd_u16(
867 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
868 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
869 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
870 // CHECK: ret <4 x i16> [[VABD2_I]]
test_vabd_u16(uint16x4_t v1,uint16x4_t v2)871 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
872 return vabd_u16(v1, v2);
873 }
874
875 // CHECK-LABEL: @test_vabd_u32(
876 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
877 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
878 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
879 // CHECK: ret <2 x i32> [[VABD2_I]]
test_vabd_u32(uint32x2_t v1,uint32x2_t v2)880 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
881 return vabd_u32(v1, v2);
882 }
883
884 // CHECK-LABEL: @test_vabd_f32(
885 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
886 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
887 // CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2)
888 // CHECK: ret <2 x float> [[VABD2_I]]
test_vabd_f32(float32x2_t v1,float32x2_t v2)889 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
890 return vabd_f32(v1, v2);
891 }
892
893 // CHECK-LABEL: @test_vabdq_s8(
894 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
895 // CHECK: ret <16 x i8> [[VABD_I]]
test_vabdq_s8(int8x16_t v1,int8x16_t v2)896 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
897 return vabdq_s8(v1, v2);
898 }
899
900 // CHECK-LABEL: @test_vabdq_s16(
901 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
902 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
903 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
904 // CHECK: ret <8 x i16> [[VABD2_I]]
test_vabdq_s16(int16x8_t v1,int16x8_t v2)905 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
906 return vabdq_s16(v1, v2);
907 }
908
909 // CHECK-LABEL: @test_vabdq_s32(
910 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
911 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
912 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
913 // CHECK: ret <4 x i32> [[VABD2_I]]
test_vabdq_s32(int32x4_t v1,int32x4_t v2)914 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
915 return vabdq_s32(v1, v2);
916 }
917
918 // CHECK-LABEL: @test_vabdq_u8(
919 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
920 // CHECK: ret <16 x i8> [[VABD_I]]
test_vabdq_u8(uint8x16_t v1,uint8x16_t v2)921 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
922 return vabdq_u8(v1, v2);
923 }
924
925 // CHECK-LABEL: @test_vabdq_u16(
926 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
927 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
928 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
929 // CHECK: ret <8 x i16> [[VABD2_I]]
test_vabdq_u16(uint16x8_t v1,uint16x8_t v2)930 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
931 return vabdq_u16(v1, v2);
932 }
933
934 // CHECK-LABEL: @test_vabdq_u32(
935 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
936 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
937 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
938 // CHECK: ret <4 x i32> [[VABD2_I]]
test_vabdq_u32(uint32x4_t v1,uint32x4_t v2)939 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
940 return vabdq_u32(v1, v2);
941 }
942
943 // CHECK-LABEL: @test_vabdq_f32(
944 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
945 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
946 // CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2)
947 // CHECK: ret <4 x float> [[VABD2_I]]
test_vabdq_f32(float32x4_t v1,float32x4_t v2)948 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
949 return vabdq_f32(v1, v2);
950 }
951
952 // CHECK-LABEL: @test_vabdq_f64(
953 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
954 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
955 // CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2)
956 // CHECK: ret <2 x double> [[VABD2_I]]
test_vabdq_f64(float64x2_t v1,float64x2_t v2)957 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
958 return vabdq_f64(v1, v2);
959 }
960
961 // CHECK-LABEL: @test_vbsl_s8(
962 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
963 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
964 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
965 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
966 // CHECK: ret <8 x i8> [[VBSL2_I]]
test_vbsl_s8(uint8x8_t v1,int8x8_t v2,int8x8_t v3)967 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
968 return vbsl_s8(v1, v2, v3);
969 }
970
971 // CHECK-LABEL: @test_vbsl_s16(
972 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
973 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
974 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
975 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
976 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
977 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
978 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
979 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
980 // CHECK: ret <8 x i8> [[TMP4]]
test_vbsl_s16(uint16x4_t v1,int16x4_t v2,int16x4_t v3)981 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
982 return (int8x8_t)vbsl_s16(v1, v2, v3);
983 }
984
985 // CHECK-LABEL: @test_vbsl_s32(
986 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
987 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
988 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
989 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
990 // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
991 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
992 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
993 // CHECK: ret <2 x i32> [[VBSL5_I]]
test_vbsl_s32(uint32x2_t v1,int32x2_t v2,int32x2_t v3)994 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
995 return vbsl_s32(v1, v2, v3);
996 }
997
998 // CHECK-LABEL: @test_vbsl_s64(
999 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1000 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1001 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1002 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1003 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1004 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1005 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1006 // CHECK: ret <1 x i64> [[VBSL5_I]]
test_vbsl_s64(uint64x1_t v1,int64x1_t v2,int64x1_t v3)1007 int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) {
1008 return vbsl_s64(v1, v2, v3);
1009 }
1010
1011 // CHECK-LABEL: @test_vbsl_u8(
1012 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1013 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1014 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1015 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1016 // CHECK: ret <8 x i8> [[VBSL2_I]]
test_vbsl_u8(uint8x8_t v1,uint8x8_t v2,uint8x8_t v3)1017 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
1018 return vbsl_u8(v1, v2, v3);
1019 }
1020
1021 // CHECK-LABEL: @test_vbsl_u16(
1022 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1023 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1024 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1025 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1026 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1027 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1028 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1029 // CHECK: ret <4 x i16> [[VBSL5_I]]
test_vbsl_u16(uint16x4_t v1,uint16x4_t v2,uint16x4_t v3)1030 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
1031 return vbsl_u16(v1, v2, v3);
1032 }
1033
1034 // CHECK-LABEL: @test_vbsl_u32(
1035 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1036 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1037 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1038 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
1039 // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1040 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
1041 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1042 // CHECK: ret <2 x i32> [[VBSL5_I]]
test_vbsl_u32(uint32x2_t v1,uint32x2_t v2,uint32x2_t v3)1043 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
1044 return vbsl_u32(v1, v2, v3);
1045 }
1046
1047 // CHECK-LABEL: @test_vbsl_u64(
1048 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1049 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1050 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1051 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1052 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1053 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1054 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1055 // CHECK: ret <1 x i64> [[VBSL5_I]]
test_vbsl_u64(uint64x1_t v1,uint64x1_t v2,uint64x1_t v3)1056 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1057 return vbsl_u64(v1, v2, v3);
1058 }
1059
1060 // CHECK-LABEL: @test_vbsl_f32(
1061 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1062 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1063 // CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1064 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1065 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1066 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]]
1067 // CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1068 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1069 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1070 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1071 // CHECK: ret <2 x float> [[TMP5]]
test_vbsl_f32(uint32x2_t v1,float32x2_t v2,float32x2_t v3)1072 float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
1073 return vbsl_f32(v1, v2, v3);
1074 }
1075
1076 // CHECK-LABEL: @test_vbsl_f64(
1077 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1078 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1079 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1080 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1081 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1082 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]]
1083 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1084 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1085 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1086 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1087 // CHECK: ret <1 x double> [[TMP4]]
test_vbsl_f64(uint64x1_t v1,float64x1_t v2,float64x1_t v3)1088 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
1089 return vbsl_f64(v1, v2, v3);
1090 }
1091
1092 // CHECK-LABEL: @test_vbsl_p8(
1093 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1094 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1095 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1096 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1097 // CHECK: ret <8 x i8> [[VBSL2_I]]
test_vbsl_p8(uint8x8_t v1,poly8x8_t v2,poly8x8_t v3)1098 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
1099 return vbsl_p8(v1, v2, v3);
1100 }
1101
1102 // CHECK-LABEL: @test_vbsl_p16(
1103 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1104 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1105 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1106 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1107 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1108 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1109 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1110 // CHECK: ret <4 x i16> [[VBSL5_I]]
test_vbsl_p16(uint16x4_t v1,poly16x4_t v2,poly16x4_t v3)1111 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
1112 return vbsl_p16(v1, v2, v3);
1113 }
1114
1115 // CHECK-LABEL: @test_vbslq_s8(
1116 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1117 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1118 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1119 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1120 // CHECK: ret <16 x i8> [[VBSL2_I]]
test_vbslq_s8(uint8x16_t v1,int8x16_t v2,int8x16_t v3)1121 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
1122 return vbslq_s8(v1, v2, v3);
1123 }
1124
1125 // CHECK-LABEL: @test_vbslq_s16(
1126 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1127 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1128 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1129 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1130 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1131 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1132 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1133 // CHECK: ret <8 x i16> [[VBSL5_I]]
test_vbslq_s16(uint16x8_t v1,int16x8_t v2,int16x8_t v3)1134 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
1135 return vbslq_s16(v1, v2, v3);
1136 }
1137
1138 // CHECK-LABEL: @test_vbslq_s32(
1139 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1140 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1141 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1142 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1143 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1144 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1145 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146 // CHECK: ret <4 x i32> [[VBSL5_I]]
test_vbslq_s32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1147 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1148 return vbslq_s32(v1, v2, v3);
1149 }
1150
1151 // CHECK-LABEL: @test_vbslq_s64(
1152 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1153 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1154 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1155 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1156 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1157 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1158 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1159 // CHECK: ret <2 x i64> [[VBSL5_I]]
test_vbslq_s64(uint64x2_t v1,int64x2_t v2,int64x2_t v3)1160 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
1161 return vbslq_s64(v1, v2, v3);
1162 }
1163
1164 // CHECK-LABEL: @test_vbslq_u8(
1165 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1166 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1167 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1168 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1169 // CHECK: ret <16 x i8> [[VBSL2_I]]
test_vbslq_u8(uint8x16_t v1,uint8x16_t v2,uint8x16_t v3)1170 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
1171 return vbslq_u8(v1, v2, v3);
1172 }
1173
1174 // CHECK-LABEL: @test_vbslq_u16(
1175 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1176 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1177 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1178 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1179 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1180 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1181 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1182 // CHECK: ret <8 x i16> [[VBSL5_I]]
test_vbslq_u16(uint16x8_t v1,uint16x8_t v2,uint16x8_t v3)1183 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
1184 return vbslq_u16(v1, v2, v3);
1185 }
1186
1187 // CHECK-LABEL: @test_vbslq_u32(
1188 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1189 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1190 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1191 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1192 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1193 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1194 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1195 // CHECK: ret <4 x i32> [[VBSL5_I]]
test_vbslq_u32(uint32x4_t v1,int32x4_t v2,int32x4_t v3)1196 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1197 return vbslq_s32(v1, v2, v3);
1198 }
1199
1200 // CHECK-LABEL: @test_vbslq_u64(
1201 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1202 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1203 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1204 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1205 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1206 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1207 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1208 // CHECK: ret <2 x i64> [[VBSL5_I]]
test_vbslq_u64(uint64x2_t v1,uint64x2_t v2,uint64x2_t v3)1209 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
1210 return vbslq_u64(v1, v2, v3);
1211 }
1212
1213 // CHECK-LABEL: @test_vbslq_f32(
1214 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1215 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1216 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1217 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1218 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1219 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]]
1220 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1221 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1222 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1223 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1224 // CHECK: ret <4 x float> [[TMP4]]
test_vbslq_f32(uint32x4_t v1,float32x4_t v2,float32x4_t v3)1225 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
1226 return vbslq_f32(v1, v2, v3);
1227 }
1228
1229 // CHECK-LABEL: @test_vbslq_p8(
1230 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1231 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1232 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1233 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1234 // CHECK: ret <16 x i8> [[VBSL2_I]]
test_vbslq_p8(uint8x16_t v1,poly8x16_t v2,poly8x16_t v3)1235 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
1236 return vbslq_p8(v1, v2, v3);
1237 }
1238
1239 // CHECK-LABEL: @test_vbslq_p16(
1240 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1241 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1242 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1243 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1244 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1245 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1246 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1247 // CHECK: ret <8 x i16> [[VBSL5_I]]
test_vbslq_p16(uint16x8_t v1,poly16x8_t v2,poly16x8_t v3)1248 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
1249 return vbslq_p16(v1, v2, v3);
1250 }
1251
1252 // CHECK-LABEL: @test_vbslq_f64(
1253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1254 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1255 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1256 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1257 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1258 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]]
1259 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1260 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1261 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1262 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1263 // CHECK: ret <2 x double> [[TMP4]]
test_vbslq_f64(uint64x2_t v1,float64x2_t v2,float64x2_t v3)1264 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
1265 return vbslq_f64(v1, v2, v3);
1266 }
1267
1268 // CHECK-LABEL: @test_vrecps_f32(
1269 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1270 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1271 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
1272 // CHECK: ret <2 x float> [[VRECPS_V2_I]]
test_vrecps_f32(float32x2_t v1,float32x2_t v2)1273 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
1274 return vrecps_f32(v1, v2);
1275 }
1276
1277 // CHECK-LABEL: @test_vrecpsq_f32(
1278 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1279 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1280 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
1281 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1282 // CHECK: ret <4 x float> [[VRECPSQ_V2_I]]
test_vrecpsq_f32(float32x4_t v1,float32x4_t v2)1283 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
1284 return vrecpsq_f32(v1, v2);
1285 }
1286
1287 // CHECK-LABEL: @test_vrecpsq_f64(
1288 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1289 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1290 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
1291 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1292 // CHECK: ret <2 x double> [[VRECPSQ_V2_I]]
test_vrecpsq_f64(float64x2_t v1,float64x2_t v2)1293 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
1294 return vrecpsq_f64(v1, v2);
1295 }
1296
1297 // CHECK-LABEL: @test_vrsqrts_f32(
1298 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1299 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1300 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
1301 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1302 // CHECK: ret <2 x float> [[VRSQRTS_V2_I]]
test_vrsqrts_f32(float32x2_t v1,float32x2_t v2)1303 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
1304 return vrsqrts_f32(v1, v2);
1305 }
1306
1307 // CHECK-LABEL: @test_vrsqrtsq_f32(
1308 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1309 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1310 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
1311 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1312 // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f32(float32x4_t v1,float32x4_t v2)1313 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
1314 return vrsqrtsq_f32(v1, v2);
1315 }
1316
1317 // CHECK-LABEL: @test_vrsqrtsq_f64(
1318 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1319 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1320 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
1321 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1322 // CHECK: ret <2 x double> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f64(float64x2_t v1,float64x2_t v2)1323 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
1324 return vrsqrtsq_f64(v1, v2);
1325 }
1326
1327 // CHECK-LABEL: @test_vcage_f32(
1328 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1329 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1330 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1331 // CHECK: ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t v1,float32x2_t v2)1332 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
1333 return vcage_f32(v1, v2);
1334 }
1335
1336 // CHECK-LABEL: @test_vcage_f64(
1337 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1338 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1339 // CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1340 // CHECK: ret <1 x i64> [[VCAGE_V2_I]]
test_vcage_f64(float64x1_t a,float64x1_t b)1341 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
1342 return vcage_f64(a, b);
1343 }
1344
1345 // CHECK-LABEL: @test_vcageq_f32(
1346 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1347 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1348 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1349 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t v1,float32x4_t v2)1350 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
1351 return vcageq_f32(v1, v2);
1352 }
1353
1354 // CHECK-LABEL: @test_vcageq_f64(
1355 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1356 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1358 // CHECK: ret <2 x i64> [[VCAGEQ_V2_I]]
test_vcageq_f64(float64x2_t v1,float64x2_t v2)1359 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
1360 return vcageq_f64(v1, v2);
1361 }
1362
1363 // CHECK-LABEL: @test_vcagt_f32(
1364 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1365 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1366 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1367 // CHECK: ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t v1,float32x2_t v2)1368 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
1369 return vcagt_f32(v1, v2);
1370 }
1371
1372 // CHECK-LABEL: @test_vcagt_f64(
1373 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1374 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1375 // CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1376 // CHECK: ret <1 x i64> [[VCAGT_V2_I]]
test_vcagt_f64(float64x1_t a,float64x1_t b)1377 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
1378 return vcagt_f64(a, b);
1379 }
1380
1381 // CHECK-LABEL: @test_vcagtq_f32(
1382 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1383 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1384 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1385 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t v1,float32x4_t v2)1386 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
1387 return vcagtq_f32(v1, v2);
1388 }
1389
1390 // CHECK-LABEL: @test_vcagtq_f64(
1391 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1392 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1393 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1394 // CHECK: ret <2 x i64> [[VCAGTQ_V2_I]]
test_vcagtq_f64(float64x2_t v1,float64x2_t v2)1395 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
1396 return vcagtq_f64(v1, v2);
1397 }
1398
1399 // CHECK-LABEL: @test_vcale_f32(
1400 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1401 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1402 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1403 // CHECK: ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t v1,float32x2_t v2)1404 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
1405 return vcale_f32(v1, v2);
1406 // Using registers other than v0, v1 are possible, but would be odd.
1407 }
1408
1409 // CHECK-LABEL: @test_vcale_f64(
1410 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1411 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1412 // CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1413 // CHECK: ret <1 x i64> [[VCALE_V2_I]]
test_vcale_f64(float64x1_t a,float64x1_t b)1414 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
1415 return vcale_f64(a, b);
1416 }
1417
1418 // CHECK-LABEL: @test_vcaleq_f32(
1419 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1420 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1421 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1422 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t v1,float32x4_t v2)1423 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
1424 return vcaleq_f32(v1, v2);
1425 // Using registers other than v0, v1 are possible, but would be odd.
1426 }
1427
1428 // CHECK-LABEL: @test_vcaleq_f64(
1429 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1430 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1431 // CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1432 // CHECK: ret <2 x i64> [[VCALEQ_V2_I]]
test_vcaleq_f64(float64x2_t v1,float64x2_t v2)1433 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
1434 return vcaleq_f64(v1, v2);
1435 // Using registers other than v0, v1 are possible, but would be odd.
1436 }
1437
1438 // CHECK-LABEL: @test_vcalt_f32(
1439 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1440 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1441 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1442 // CHECK: ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t v1,float32x2_t v2)1443 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
1444 return vcalt_f32(v1, v2);
1445 // Using registers other than v0, v1 are possible, but would be odd.
1446 }
1447
1448 // CHECK-LABEL: @test_vcalt_f64(
1449 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1450 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1451 // CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1452 // CHECK: ret <1 x i64> [[VCALT_V2_I]]
test_vcalt_f64(float64x1_t a,float64x1_t b)1453 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
1454 return vcalt_f64(a, b);
1455 }
1456
1457 // CHECK-LABEL: @test_vcaltq_f32(
1458 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1459 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1460 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1461 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t v1,float32x4_t v2)1462 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
1463 return vcaltq_f32(v1, v2);
1464 // Using registers other than v0, v1 are possible, but would be odd.
1465 }
1466
1467 // CHECK-LABEL: @test_vcaltq_f64(
1468 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1469 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1470 // CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1471 // CHECK: ret <2 x i64> [[VCALTQ_V2_I]]
test_vcaltq_f64(float64x2_t v1,float64x2_t v2)1472 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
1473 return vcaltq_f64(v1, v2);
1474 // Using registers other than v0, v1 are possible, but would be odd.
1475 }
1476
1477 // CHECK-LABEL: @test_vtst_s8(
1478 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1479 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1480 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1481 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t v1,int8x8_t v2)1482 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
1483 return vtst_s8(v1, v2);
1484 }
1485
1486 // CHECK-LABEL: @test_vtst_s16(
1487 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1488 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1489 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1490 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1491 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1492 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t v1,int16x4_t v2)1493 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
1494 return vtst_s16(v1, v2);
1495 }
1496
1497 // CHECK-LABEL: @test_vtst_s32(
1498 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1499 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1500 // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1501 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1502 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1503 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t v1,int32x2_t v2)1504 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
1505 return vtst_s32(v1, v2);
1506 }
1507
1508 // CHECK-LABEL: @test_vtst_u8(
1509 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1510 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1511 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1512 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t v1,uint8x8_t v2)1513 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
1514 return vtst_u8(v1, v2);
1515 }
1516
1517 // CHECK-LABEL: @test_vtst_u16(
1518 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1519 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1520 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1521 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1522 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1523 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t v1,uint16x4_t v2)1524 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
1525 return vtst_u16(v1, v2);
1526 }
1527
1528 // CHECK-LABEL: @test_vtst_u32(
1529 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1530 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1531 // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1532 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1533 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1534 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t v1,uint32x2_t v2)1535 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
1536 return vtst_u32(v1, v2);
1537 }
1538
1539 // CHECK-LABEL: @test_vtstq_s8(
1540 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1541 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1542 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1543 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t v1,int8x16_t v2)1544 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
1545 return vtstq_s8(v1, v2);
1546 }
1547
1548 // CHECK-LABEL: @test_vtstq_s16(
1549 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1550 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1551 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1552 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1553 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1554 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t v1,int16x8_t v2)1555 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
1556 return vtstq_s16(v1, v2);
1557 }
1558
1559 // CHECK-LABEL: @test_vtstq_s32(
1560 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1561 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1562 // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1563 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1564 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1565 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t v1,int32x4_t v2)1566 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
1567 return vtstq_s32(v1, v2);
1568 }
1569
1570 // CHECK-LABEL: @test_vtstq_u8(
1571 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1572 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1573 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1574 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t v1,uint8x16_t v2)1575 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
1576 return vtstq_u8(v1, v2);
1577 }
1578
1579 // CHECK-LABEL: @test_vtstq_u16(
1580 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1581 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1582 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1583 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1584 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1585 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t v1,uint16x8_t v2)1586 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
1587 return vtstq_u16(v1, v2);
1588 }
1589
1590 // CHECK-LABEL: @test_vtstq_u32(
1591 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1592 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1593 // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1594 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1595 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1596 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t v1,uint32x4_t v2)1597 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
1598 return vtstq_u32(v1, v2);
1599 }
1600
1601 // CHECK-LABEL: @test_vtstq_s64(
1602 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1603 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1604 // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1605 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1606 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1607 // CHECK: ret <2 x i64> [[VTST_I]]
test_vtstq_s64(int64x2_t v1,int64x2_t v2)1608 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
1609 return vtstq_s64(v1, v2);
1610 }
1611
1612 // CHECK-LABEL: @test_vtstq_u64(
1613 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1614 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1615 // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1616 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1617 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1618 // CHECK: ret <2 x i64> [[VTST_I]]
test_vtstq_u64(uint64x2_t v1,uint64x2_t v2)1619 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
1620 return vtstq_u64(v1, v2);
1621 }
1622
1623 // CHECK-LABEL: @test_vtst_p8(
1624 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1625 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1626 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1627 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t v1,poly8x8_t v2)1628 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
1629 return vtst_p8(v1, v2);
1630 }
1631
1632 // CHECK-LABEL: @test_vtst_p16(
1633 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1634 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1635 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1636 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1637 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1638 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t v1,poly16x4_t v2)1639 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
1640 return vtst_p16(v1, v2);
1641 }
1642
1643 // CHECK-LABEL: @test_vtstq_p8(
1644 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1645 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1646 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1647 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t v1,poly8x16_t v2)1648 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
1649 return vtstq_p8(v1, v2);
1650 }
1651
1652 // CHECK-LABEL: @test_vtstq_p16(
1653 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1654 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1655 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1656 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1657 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1658 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t v1,poly16x8_t v2)1659 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
1660 return vtstq_p16(v1, v2);
1661 }
1662
1663 // CHECK-LABEL: @test_vtst_s64(
1664 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1665 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1666 // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b
1667 // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1668 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1669 // CHECK: ret <1 x i64> [[VTST_I]]
test_vtst_s64(int64x1_t a,int64x1_t b)1670 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
1671 return vtst_s64(a, b);
1672 }
1673
1674 // CHECK-LABEL: @test_vtst_u64(
1675 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1676 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1677 // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b
1678 // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1679 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1680 // CHECK: ret <1 x i64> [[VTST_I]]
test_vtst_u64(uint64x1_t a,uint64x1_t b)1681 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
1682 return vtst_u64(a, b);
1683 }
1684
1685 // CHECK-LABEL: @test_vceq_s8(
1686 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1687 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1688 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t v1,int8x8_t v2)1689 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
1690 return vceq_s8(v1, v2);
1691 }
1692
1693 // CHECK-LABEL: @test_vceq_s16(
1694 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1695 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1696 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t v1,int16x4_t v2)1697 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
1698 return vceq_s16(v1, v2);
1699 }
1700
1701 // CHECK-LABEL: @test_vceq_s32(
1702 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1703 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1704 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t v1,int32x2_t v2)1705 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
1706 return vceq_s32(v1, v2);
1707 }
1708
1709 // CHECK-LABEL: @test_vceq_s64(
1710 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1711 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1712 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_s64(int64x1_t a,int64x1_t b)1713 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
1714 return vceq_s64(a, b);
1715 }
1716
1717 // CHECK-LABEL: @test_vceq_u64(
1718 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1719 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1720 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_u64(uint64x1_t a,uint64x1_t b)1721 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
1722 return vceq_u64(a, b);
1723 }
1724
1725 // CHECK-LABEL: @test_vceq_f32(
1726 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1727 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1728 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t v1,float32x2_t v2)1729 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
1730 return vceq_f32(v1, v2);
1731 }
1732
1733 // CHECK-LABEL: @test_vceq_f64(
1734 // CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1735 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1736 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_f64(float64x1_t a,float64x1_t b)1737 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
1738 return vceq_f64(a, b);
1739 }
1740
1741 // CHECK-LABEL: @test_vceq_u8(
1742 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1743 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1744 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t v1,uint8x8_t v2)1745 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
1746 return vceq_u8(v1, v2);
1747 }
1748
1749 // CHECK-LABEL: @test_vceq_u16(
1750 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1751 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1752 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t v1,uint16x4_t v2)1753 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
1754 return vceq_u16(v1, v2);
1755 }
1756
1757 // CHECK-LABEL: @test_vceq_u32(
1758 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1759 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1760 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t v1,uint32x2_t v2)1761 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
1762 return vceq_u32(v1, v2);
1763 }
1764
1765 // CHECK-LABEL: @test_vceq_p8(
1766 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1767 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1768 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t v1,poly8x8_t v2)1769 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
1770 return vceq_p8(v1, v2);
1771 }
1772
1773 // CHECK-LABEL: @test_vceqq_s8(
1774 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1775 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1776 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t v1,int8x16_t v2)1777 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
1778 return vceqq_s8(v1, v2);
1779 }
1780
1781 // CHECK-LABEL: @test_vceqq_s16(
1782 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1783 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1784 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t v1,int16x8_t v2)1785 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
1786 return vceqq_s16(v1, v2);
1787 }
1788
1789 // CHECK-LABEL: @test_vceqq_s32(
1790 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1791 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1792 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t v1,int32x4_t v2)1793 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
1794 return vceqq_s32(v1, v2);
1795 }
1796
1797 // CHECK-LABEL: @test_vceqq_f32(
1798 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1799 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1800 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t v1,float32x4_t v2)1801 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
1802 return vceqq_f32(v1, v2);
1803 }
1804
1805 // CHECK-LABEL: @test_vceqq_u8(
1806 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1807 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1808 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t v1,uint8x16_t v2)1809 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
1810 return vceqq_u8(v1, v2);
1811 }
1812
1813 // CHECK-LABEL: @test_vceqq_u16(
1814 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1815 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1816 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t v1,uint16x8_t v2)1817 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
1818 return vceqq_u16(v1, v2);
1819 }
1820
1821 // CHECK-LABEL: @test_vceqq_u32(
1822 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1823 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1824 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t v1,uint32x4_t v2)1825 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
1826 return vceqq_u32(v1, v2);
1827 }
1828
1829 // CHECK-LABEL: @test_vceqq_p8(
1830 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1831 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1832 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t v1,poly8x16_t v2)1833 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
1834 return vceqq_p8(v1, v2);
1835 }
1836
1837 // CHECK-LABEL: @test_vceqq_s64(
1838 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1839 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1840 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_s64(int64x2_t v1,int64x2_t v2)1841 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
1842 return vceqq_s64(v1, v2);
1843 }
1844
1845 // CHECK-LABEL: @test_vceqq_u64(
1846 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1847 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1848 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_u64(uint64x2_t v1,uint64x2_t v2)1849 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
1850 return vceqq_u64(v1, v2);
1851 }
1852
1853 // CHECK-LABEL: @test_vceqq_f64(
1854 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
1855 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1856 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_f64(float64x2_t v1,float64x2_t v2)1857 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
1858 return vceqq_f64(v1, v2);
1859 }
1860
1861 // CHECK-LABEL: @test_vcge_s8(
1862 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
1863 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1864 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t v1,int8x8_t v2)1865 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
1866 return vcge_s8(v1, v2);
1867 }
1868
1869 // CHECK-LABEL: @test_vcge_s16(
1870 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
1871 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1872 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t v1,int16x4_t v2)1873 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
1874 return vcge_s16(v1, v2);
1875 }
1876
1877 // CHECK-LABEL: @test_vcge_s32(
1878 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
1879 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1880 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t v1,int32x2_t v2)1881 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
1882 return vcge_s32(v1, v2);
1883 }
1884
1885 // CHECK-LABEL: @test_vcge_s64(
1886 // CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
1887 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1888 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcge_s64(int64x1_t a,int64x1_t b)1889 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
1890 return vcge_s64(a, b);
1891 }
1892
1893 // CHECK-LABEL: @test_vcge_u64(
1894 // CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
1895 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1896 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcge_u64(uint64x1_t a,uint64x1_t b)1897 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
1898 return vcge_u64(a, b);
1899 }
1900
1901 // CHECK-LABEL: @test_vcge_f32(
1902 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
1903 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1904 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t v1,float32x2_t v2)1905 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
1906 return vcge_f32(v1, v2);
1907 }
1908
1909 // CHECK-LABEL: @test_vcge_f64(
1910 // CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
1911 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1912 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcge_f64(float64x1_t a,float64x1_t b)1913 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
1914 return vcge_f64(a, b);
1915 }
1916
1917 // CHECK-LABEL: @test_vcge_u8(
1918 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
1919 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1920 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t v1,uint8x8_t v2)1921 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
1922 return vcge_u8(v1, v2);
1923 }
1924
1925 // CHECK-LABEL: @test_vcge_u16(
1926 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
1927 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1928 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t v1,uint16x4_t v2)1929 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
1930 return vcge_u16(v1, v2);
1931 }
1932
1933 // CHECK-LABEL: @test_vcge_u32(
1934 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
1935 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1936 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t v1,uint32x2_t v2)1937 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
1938 return vcge_u32(v1, v2);
1939 }
1940
1941 // CHECK-LABEL: @test_vcgeq_s8(
1942 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
1943 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1944 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t v1,int8x16_t v2)1945 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
1946 return vcgeq_s8(v1, v2);
1947 }
1948
1949 // CHECK-LABEL: @test_vcgeq_s16(
1950 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
1951 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1952 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t v1,int16x8_t v2)1953 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
1954 return vcgeq_s16(v1, v2);
1955 }
1956
1957 // CHECK-LABEL: @test_vcgeq_s32(
1958 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
1959 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1960 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t v1,int32x4_t v2)1961 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
1962 return vcgeq_s32(v1, v2);
1963 }
1964
1965 // CHECK-LABEL: @test_vcgeq_f32(
1966 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
1967 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1968 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t v1,float32x4_t v2)1969 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
1970 return vcgeq_f32(v1, v2);
1971 }
1972
1973 // CHECK-LABEL: @test_vcgeq_u8(
1974 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
1975 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1976 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t v1,uint8x16_t v2)1977 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
1978 return vcgeq_u8(v1, v2);
1979 }
1980
1981 // CHECK-LABEL: @test_vcgeq_u16(
1982 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
1983 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1984 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t v1,uint16x8_t v2)1985 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
1986 return vcgeq_u16(v1, v2);
1987 }
1988
1989 // CHECK-LABEL: @test_vcgeq_u32(
1990 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
1991 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1992 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t v1,uint32x4_t v2)1993 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
1994 return vcgeq_u32(v1, v2);
1995 }
1996
1997 // CHECK-LABEL: @test_vcgeq_s64(
1998 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
1999 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2000 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgeq_s64(int64x2_t v1,int64x2_t v2)2001 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
2002 return vcgeq_s64(v1, v2);
2003 }
2004
2005 // CHECK-LABEL: @test_vcgeq_u64(
2006 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2007 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2008 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgeq_u64(uint64x2_t v1,uint64x2_t v2)2009 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
2010 return vcgeq_u64(v1, v2);
2011 }
2012
2013 // CHECK-LABEL: @test_vcgeq_f64(
2014 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2015 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2016 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgeq_f64(float64x2_t v1,float64x2_t v2)2017 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
2018 return vcgeq_f64(v1, v2);
2019 }
2020
2021 // CHECK-LABEL: @test_vcle_s8(
2022 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2023 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2024 // CHECK: ret <8 x i8> [[SEXT_I]]
2025 // Notes about vcle:
2026 // LE condition predicate implemented as GE, so check reversed operands.
2027 // Using registers other than v0, v1 are possible, but would be odd.
test_vcle_s8(int8x8_t v1,int8x8_t v2)2028 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
2029 return vcle_s8(v1, v2);
2030 }
2031
2032 // CHECK-LABEL: @test_vcle_s16(
2033 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2034 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2035 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t v1,int16x4_t v2)2036 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
2037 return vcle_s16(v1, v2);
2038 }
2039
2040 // CHECK-LABEL: @test_vcle_s32(
2041 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2042 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2043 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t v1,int32x2_t v2)2044 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
2045 return vcle_s32(v1, v2);
2046 }
2047
2048 // CHECK-LABEL: @test_vcle_s64(
2049 // CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2050 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2051 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcle_s64(int64x1_t a,int64x1_t b)2052 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
2053 return vcle_s64(a, b);
2054 }
2055
2056 // CHECK-LABEL: @test_vcle_u64(
2057 // CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2058 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2059 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcle_u64(uint64x1_t a,uint64x1_t b)2060 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
2061 return vcle_u64(a, b);
2062 }
2063
2064 // CHECK-LABEL: @test_vcle_f32(
2065 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2066 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2067 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t v1,float32x2_t v2)2068 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
2069 return vcle_f32(v1, v2);
2070 }
2071
2072 // CHECK-LABEL: @test_vcle_f64(
2073 // CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2074 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2075 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcle_f64(float64x1_t a,float64x1_t b)2076 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
2077 return vcle_f64(a, b);
2078 }
2079
2080 // CHECK-LABEL: @test_vcle_u8(
2081 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2082 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2083 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t v1,uint8x8_t v2)2084 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
2085 return vcle_u8(v1, v2);
2086 }
2087
2088 // CHECK-LABEL: @test_vcle_u16(
2089 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2090 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2091 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t v1,uint16x4_t v2)2092 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
2093 return vcle_u16(v1, v2);
2094 }
2095
2096 // CHECK-LABEL: @test_vcle_u32(
2097 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2098 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2099 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t v1,uint32x2_t v2)2100 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
2101 return vcle_u32(v1, v2);
2102 }
2103
2104 // CHECK-LABEL: @test_vcleq_s8(
2105 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2106 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2107 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t v1,int8x16_t v2)2108 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
2109 return vcleq_s8(v1, v2);
2110 }
2111
2112 // CHECK-LABEL: @test_vcleq_s16(
2113 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2114 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2115 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t v1,int16x8_t v2)2116 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
2117 return vcleq_s16(v1, v2);
2118 }
2119
2120 // CHECK-LABEL: @test_vcleq_s32(
2121 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2122 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2123 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t v1,int32x4_t v2)2124 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
2125 return vcleq_s32(v1, v2);
2126 }
2127
2128 // CHECK-LABEL: @test_vcleq_f32(
2129 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2130 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2131 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t v1,float32x4_t v2)2132 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
2133 return vcleq_f32(v1, v2);
2134 }
2135
2136 // CHECK-LABEL: @test_vcleq_u8(
2137 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2138 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2139 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t v1,uint8x16_t v2)2140 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
2141 return vcleq_u8(v1, v2);
2142 }
2143
2144 // CHECK-LABEL: @test_vcleq_u16(
2145 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2146 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2147 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t v1,uint16x8_t v2)2148 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
2149 return vcleq_u16(v1, v2);
2150 }
2151
2152 // CHECK-LABEL: @test_vcleq_u32(
2153 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2154 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2155 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t v1,uint32x4_t v2)2156 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
2157 return vcleq_u32(v1, v2);
2158 }
2159
2160 // CHECK-LABEL: @test_vcleq_s64(
2161 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2162 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2163 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcleq_s64(int64x2_t v1,int64x2_t v2)2164 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
2165 return vcleq_s64(v1, v2);
2166 }
2167
2168 // CHECK-LABEL: @test_vcleq_u64(
2169 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2170 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2171 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcleq_u64(uint64x2_t v1,uint64x2_t v2)2172 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
2173 return vcleq_u64(v1, v2);
2174 }
2175
2176 // CHECK-LABEL: @test_vcleq_f64(
2177 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2178 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2179 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcleq_f64(float64x2_t v1,float64x2_t v2)2180 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
2181 return vcleq_f64(v1, v2);
2182 }
2183
2184 // CHECK-LABEL: @test_vcgt_s8(
2185 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2186 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2187 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t v1,int8x8_t v2)2188 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
2189 return vcgt_s8(v1, v2);
2190 }
2191
2192 // CHECK-LABEL: @test_vcgt_s16(
2193 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2194 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2195 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t v1,int16x4_t v2)2196 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
2197 return vcgt_s16(v1, v2);
2198 }
2199
2200 // CHECK-LABEL: @test_vcgt_s32(
2201 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2202 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2203 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t v1,int32x2_t v2)2204 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
2205 return vcgt_s32(v1, v2);
2206 }
2207
2208 // CHECK-LABEL: @test_vcgt_s64(
2209 // CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2210 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2211 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcgt_s64(int64x1_t a,int64x1_t b)2212 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
2213 return vcgt_s64(a, b);
2214 }
2215
2216 // CHECK-LABEL: @test_vcgt_u64(
2217 // CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2218 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2219 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcgt_u64(uint64x1_t a,uint64x1_t b)2220 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
2221 return vcgt_u64(a, b);
2222 }
2223
2224 // CHECK-LABEL: @test_vcgt_f32(
2225 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2226 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2227 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t v1,float32x2_t v2)2228 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
2229 return vcgt_f32(v1, v2);
2230 }
2231
2232 // CHECK-LABEL: @test_vcgt_f64(
2233 // CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2234 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2235 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vcgt_f64(float64x1_t a,float64x1_t b)2236 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
2237 return vcgt_f64(a, b);
2238 }
2239
2240 // CHECK-LABEL: @test_vcgt_u8(
2241 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2242 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2243 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t v1,uint8x8_t v2)2244 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
2245 return vcgt_u8(v1, v2);
2246 }
2247
2248 // CHECK-LABEL: @test_vcgt_u16(
2249 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2250 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2251 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t v1,uint16x4_t v2)2252 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
2253 return vcgt_u16(v1, v2);
2254 }
2255
2256 // CHECK-LABEL: @test_vcgt_u32(
2257 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2258 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2259 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t v1,uint32x2_t v2)2260 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
2261 return vcgt_u32(v1, v2);
2262 }
2263
2264 // CHECK-LABEL: @test_vcgtq_s8(
2265 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2266 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2267 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t v1,int8x16_t v2)2268 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
2269 return vcgtq_s8(v1, v2);
2270 }
2271
2272 // CHECK-LABEL: @test_vcgtq_s16(
2273 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2274 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2275 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t v1,int16x8_t v2)2276 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
2277 return vcgtq_s16(v1, v2);
2278 }
2279
2280 // CHECK-LABEL: @test_vcgtq_s32(
2281 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2282 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2283 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t v1,int32x4_t v2)2284 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
2285 return vcgtq_s32(v1, v2);
2286 }
2287
2288 // CHECK-LABEL: @test_vcgtq_f32(
2289 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2290 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2291 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t v1,float32x4_t v2)2292 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
2293 return vcgtq_f32(v1, v2);
2294 }
2295
2296 // CHECK-LABEL: @test_vcgtq_u8(
2297 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2298 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2299 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t v1,uint8x16_t v2)2300 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
2301 return vcgtq_u8(v1, v2);
2302 }
2303
2304 // CHECK-LABEL: @test_vcgtq_u16(
2305 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2306 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2307 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t v1,uint16x8_t v2)2308 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
2309 return vcgtq_u16(v1, v2);
2310 }
2311
2312 // CHECK-LABEL: @test_vcgtq_u32(
2313 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2314 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2315 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t v1,uint32x4_t v2)2316 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
2317 return vcgtq_u32(v1, v2);
2318 }
2319
2320 // CHECK-LABEL: @test_vcgtq_s64(
2321 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2322 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2323 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgtq_s64(int64x2_t v1,int64x2_t v2)2324 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
2325 return vcgtq_s64(v1, v2);
2326 }
2327
2328 // CHECK-LABEL: @test_vcgtq_u64(
2329 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2330 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2331 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgtq_u64(uint64x2_t v1,uint64x2_t v2)2332 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
2333 return vcgtq_u64(v1, v2);
2334 }
2335
2336 // CHECK-LABEL: @test_vcgtq_f64(
2337 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2338 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2339 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcgtq_f64(float64x2_t v1,float64x2_t v2)2340 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
2341 return vcgtq_f64(v1, v2);
2342 }
2343
2344 // CHECK-LABEL: @test_vclt_s8(
2345 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2346 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2347 // CHECK: ret <8 x i8> [[SEXT_I]]
2348 // Notes about vclt:
2349 // LT condition predicate implemented as GT, so check reversed operands.
2350 // Using registers other than v0, v1 are possible, but would be odd.
test_vclt_s8(int8x8_t v1,int8x8_t v2)2351 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
2352 return vclt_s8(v1, v2);
2353 }
2354
2355 // CHECK-LABEL: @test_vclt_s16(
2356 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2357 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2358 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t v1,int16x4_t v2)2359 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
2360 return vclt_s16(v1, v2);
2361 }
2362
2363 // CHECK-LABEL: @test_vclt_s32(
2364 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2365 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2366 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t v1,int32x2_t v2)2367 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
2368 return vclt_s32(v1, v2);
2369 }
2370
2371 // CHECK-LABEL: @test_vclt_s64(
2372 // CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2373 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2374 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vclt_s64(int64x1_t a,int64x1_t b)2375 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
2376 return vclt_s64(a, b);
2377 }
2378
2379 // CHECK-LABEL: @test_vclt_u64(
2380 // CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2381 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2382 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vclt_u64(uint64x1_t a,uint64x1_t b)2383 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
2384 return vclt_u64(a, b);
2385 }
2386
2387 // CHECK-LABEL: @test_vclt_f32(
2388 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2389 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2390 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t v1,float32x2_t v2)2391 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
2392 return vclt_f32(v1, v2);
2393 }
2394
2395 // CHECK-LABEL: @test_vclt_f64(
2396 // CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2397 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2398 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vclt_f64(float64x1_t a,float64x1_t b)2399 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
2400 return vclt_f64(a, b);
2401 }
2402
2403 // CHECK-LABEL: @test_vclt_u8(
2404 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2405 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2406 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t v1,uint8x8_t v2)2407 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
2408 return vclt_u8(v1, v2);
2409 }
2410
2411 // CHECK-LABEL: @test_vclt_u16(
2412 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2413 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2414 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t v1,uint16x4_t v2)2415 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
2416 return vclt_u16(v1, v2);
2417 }
2418
2419 // CHECK-LABEL: @test_vclt_u32(
2420 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2421 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2422 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t v1,uint32x2_t v2)2423 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
2424 return vclt_u32(v1, v2);
2425 }
2426
2427 // CHECK-LABEL: @test_vcltq_s8(
2428 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2429 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2430 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t v1,int8x16_t v2)2431 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
2432 return vcltq_s8(v1, v2);
2433 }
2434
2435 // CHECK-LABEL: @test_vcltq_s16(
2436 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2437 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2438 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t v1,int16x8_t v2)2439 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
2440 return vcltq_s16(v1, v2);
2441 }
2442
2443 // CHECK-LABEL: @test_vcltq_s32(
2444 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2445 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2446 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t v1,int32x4_t v2)2447 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
2448 return vcltq_s32(v1, v2);
2449 }
2450
2451 // CHECK-LABEL: @test_vcltq_f32(
2452 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2453 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2454 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t v1,float32x4_t v2)2455 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
2456 return vcltq_f32(v1, v2);
2457 }
2458
2459 // CHECK-LABEL: @test_vcltq_u8(
2460 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2461 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2462 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t v1,uint8x16_t v2)2463 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
2464 return vcltq_u8(v1, v2);
2465 }
2466
2467 // CHECK-LABEL: @test_vcltq_u16(
2468 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2469 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2470 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t v1,uint16x8_t v2)2471 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
2472 return vcltq_u16(v1, v2);
2473 }
2474
2475 // CHECK-LABEL: @test_vcltq_u32(
2476 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2477 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2478 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t v1,uint32x4_t v2)2479 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
2480 return vcltq_u32(v1, v2);
2481 }
2482
2483 // CHECK-LABEL: @test_vcltq_s64(
2484 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2485 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2486 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcltq_s64(int64x2_t v1,int64x2_t v2)2487 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
2488 return vcltq_s64(v1, v2);
2489 }
2490
2491 // CHECK-LABEL: @test_vcltq_u64(
2492 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2493 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2494 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcltq_u64(uint64x2_t v1,uint64x2_t v2)2495 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
2496 return vcltq_u64(v1, v2);
2497 }
2498
2499 // CHECK-LABEL: @test_vcltq_f64(
2500 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2501 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2502 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vcltq_f64(float64x2_t v1,float64x2_t v2)2503 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
2504 return vcltq_f64(v1, v2);
2505 }
2506
2507 // CHECK-LABEL: @test_vhadd_s8(
2508 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2509 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t v1,int8x8_t v2)2510 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
2511 return vhadd_s8(v1, v2);
2512 }
2513
2514 // CHECK-LABEL: @test_vhadd_s16(
2515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2516 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2517 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2518 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2519 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_s16(int16x4_t v1,int16x4_t v2)2520 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
2521 return vhadd_s16(v1, v2);
2522 }
2523
2524 // CHECK-LABEL: @test_vhadd_s32(
2525 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2526 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2527 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2528 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2529 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_s32(int32x2_t v1,int32x2_t v2)2530 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
2531 return vhadd_s32(v1, v2);
2532 }
2533
2534 // CHECK-LABEL: @test_vhadd_u8(
2535 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2536 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t v1,uint8x8_t v2)2537 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2538 return vhadd_u8(v1, v2);
2539 }
2540
2541 // CHECK-LABEL: @test_vhadd_u16(
2542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2543 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2544 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2545 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2546 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_u16(uint16x4_t v1,uint16x4_t v2)2547 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2548 return vhadd_u16(v1, v2);
2549 }
2550
2551 // CHECK-LABEL: @test_vhadd_u32(
2552 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2553 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2554 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2555 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2556 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_u32(uint32x2_t v1,uint32x2_t v2)2557 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2558 return vhadd_u32(v1, v2);
2559 }
2560
2561 // CHECK-LABEL: @test_vhaddq_s8(
2562 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2563 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t v1,int8x16_t v2)2564 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
2565 return vhaddq_s8(v1, v2);
2566 }
2567
2568 // CHECK-LABEL: @test_vhaddq_s16(
2569 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2571 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2572 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2573 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_s16(int16x8_t v1,int16x8_t v2)2574 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
2575 return vhaddq_s16(v1, v2);
2576 }
2577
2578 // CHECK-LABEL: @test_vhaddq_s32(
2579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2580 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2581 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2582 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2583 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_s32(int32x4_t v1,int32x4_t v2)2584 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
2585 return vhaddq_s32(v1, v2);
2586 }
2587
2588 // CHECK-LABEL: @test_vhaddq_u8(
2589 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2590 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t v1,uint8x16_t v2)2591 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2592 return vhaddq_u8(v1, v2);
2593 }
2594
2595 // CHECK-LABEL: @test_vhaddq_u16(
2596 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2597 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2598 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2599 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2600 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_u16(uint16x8_t v1,uint16x8_t v2)2601 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2602 return vhaddq_u16(v1, v2);
2603 }
2604
2605 // CHECK-LABEL: @test_vhaddq_u32(
2606 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2607 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2608 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2609 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2610 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_u32(uint32x4_t v1,uint32x4_t v2)2611 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2612 return vhaddq_u32(v1, v2);
2613 }
2614
2615 // CHECK-LABEL: @test_vhsub_s8(
2616 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2617 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t v1,int8x8_t v2)2618 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
2619 return vhsub_s8(v1, v2);
2620 }
2621
2622 // CHECK-LABEL: @test_vhsub_s16(
2623 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2624 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2625 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2626 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2627 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_s16(int16x4_t v1,int16x4_t v2)2628 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
2629 return vhsub_s16(v1, v2);
2630 }
2631
2632 // CHECK-LABEL: @test_vhsub_s32(
2633 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2634 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2635 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2636 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2637 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_s32(int32x2_t v1,int32x2_t v2)2638 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
2639 return vhsub_s32(v1, v2);
2640 }
2641
2642 // CHECK-LABEL: @test_vhsub_u8(
2643 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2644 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t v1,uint8x8_t v2)2645 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
2646 return vhsub_u8(v1, v2);
2647 }
2648
2649 // CHECK-LABEL: @test_vhsub_u16(
2650 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2651 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2652 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2653 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2654 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_u16(uint16x4_t v1,uint16x4_t v2)2655 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
2656 return vhsub_u16(v1, v2);
2657 }
2658
2659 // CHECK-LABEL: @test_vhsub_u32(
2660 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2661 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2662 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2663 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2664 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_u32(uint32x2_t v1,uint32x2_t v2)2665 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
2666 return vhsub_u32(v1, v2);
2667 }
2668
2669 // CHECK-LABEL: @test_vhsubq_s8(
2670 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2671 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t v1,int8x16_t v2)2672 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
2673 return vhsubq_s8(v1, v2);
2674 }
2675
2676 // CHECK-LABEL: @test_vhsubq_s16(
2677 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2678 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2679 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2680 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2681 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_s16(int16x8_t v1,int16x8_t v2)2682 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
2683 return vhsubq_s16(v1, v2);
2684 }
2685
2686 // CHECK-LABEL: @test_vhsubq_s32(
2687 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2688 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2689 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2690 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2691 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_s32(int32x4_t v1,int32x4_t v2)2692 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
2693 return vhsubq_s32(v1, v2);
2694 }
2695
2696 // CHECK-LABEL: @test_vhsubq_u8(
2697 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2698 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t v1,uint8x16_t v2)2699 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
2700 return vhsubq_u8(v1, v2);
2701 }
2702
2703 // CHECK-LABEL: @test_vhsubq_u16(
2704 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2705 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2706 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2707 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2708 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_u16(uint16x8_t v1,uint16x8_t v2)2709 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
2710 return vhsubq_u16(v1, v2);
2711 }
2712
2713 // CHECK-LABEL: @test_vhsubq_u32(
2714 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2715 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2716 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2717 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2718 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_u32(uint32x4_t v1,uint32x4_t v2)2719 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
2720 return vhsubq_u32(v1, v2);
2721 }
2722
2723 // CHECK-LABEL: @test_vrhadd_s8(
2724 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2725 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t v1,int8x8_t v2)2726 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
2727 return vrhadd_s8(v1, v2);
2728 }
2729
2730 // CHECK-LABEL: @test_vrhadd_s16(
2731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2732 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2733 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2734 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2735 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_s16(int16x4_t v1,int16x4_t v2)2736 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
2737 return vrhadd_s16(v1, v2);
2738 }
2739
2740 // CHECK-LABEL: @test_vrhadd_s32(
2741 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2742 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2743 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2744 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2745 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_s32(int32x2_t v1,int32x2_t v2)2746 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
2747 return vrhadd_s32(v1, v2);
2748 }
2749
2750 // CHECK-LABEL: @test_vrhadd_u8(
2751 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2752 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t v1,uint8x8_t v2)2753 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2754 return vrhadd_u8(v1, v2);
2755 }
2756
2757 // CHECK-LABEL: @test_vrhadd_u16(
2758 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2759 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2760 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2761 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2762 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_u16(uint16x4_t v1,uint16x4_t v2)2763 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2764 return vrhadd_u16(v1, v2);
2765 }
2766
2767 // CHECK-LABEL: @test_vrhadd_u32(
2768 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2769 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2770 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2771 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2772 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_u32(uint32x2_t v1,uint32x2_t v2)2773 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2774 return vrhadd_u32(v1, v2);
2775 }
2776
2777 // CHECK-LABEL: @test_vrhaddq_s8(
2778 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2779 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t v1,int8x16_t v2)2780 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
2781 return vrhaddq_s8(v1, v2);
2782 }
2783
2784 // CHECK-LABEL: @test_vrhaddq_s16(
2785 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2786 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2787 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2788 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2789 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_s16(int16x8_t v1,int16x8_t v2)2790 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
2791 return vrhaddq_s16(v1, v2);
2792 }
2793
2794 // CHECK-LABEL: @test_vrhaddq_s32(
2795 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2796 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2797 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2798 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2799 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_s32(int32x4_t v1,int32x4_t v2)2800 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
2801 return vrhaddq_s32(v1, v2);
2802 }
2803
2804 // CHECK-LABEL: @test_vrhaddq_u8(
2805 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2806 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t v1,uint8x16_t v2)2807 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2808 return vrhaddq_u8(v1, v2);
2809 }
2810
2811 // CHECK-LABEL: @test_vrhaddq_u16(
2812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2813 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2814 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2815 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2816 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_u16(uint16x8_t v1,uint16x8_t v2)2817 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2818 return vrhaddq_u16(v1, v2);
2819 }
2820
2821 // CHECK-LABEL: @test_vrhaddq_u32(
2822 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2823 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2824 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2825 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2826 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_u32(uint32x4_t v1,uint32x4_t v2)2827 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2828 return vrhaddq_u32(v1, v2);
2829 }
2830
2831 // CHECK-LABEL: @test_vqadd_s8(
2832 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2833 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)2834 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
2835 return vqadd_s8(a, b);
2836 }
2837
2838 // CHECK-LABEL: @test_vqadd_s16(
2839 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2840 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2841 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2842 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2843 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_s16(int16x4_t a,int16x4_t b)2844 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
2845 return vqadd_s16(a, b);
2846 }
2847
2848 // CHECK-LABEL: @test_vqadd_s32(
2849 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2850 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2851 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2852 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2853 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_s32(int32x2_t a,int32x2_t b)2854 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
2855 return vqadd_s32(a, b);
2856 }
2857
2858 // CHECK-LABEL: @test_vqadd_s64(
2859 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2860 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2861 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2862 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2863 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_s64(int64x1_t a,int64x1_t b)2864 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
2865 return vqadd_s64(a, b);
2866 }
2867
2868 // CHECK-LABEL: @test_vqadd_u8(
2869 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2870 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)2871 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
2872 return vqadd_u8(a, b);
2873 }
2874
2875 // CHECK-LABEL: @test_vqadd_u16(
2876 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2877 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2878 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2879 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2880 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)2881 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
2882 return vqadd_u16(a, b);
2883 }
2884
2885 // CHECK-LABEL: @test_vqadd_u32(
2886 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2887 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2888 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2889 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2890 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)2891 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
2892 return vqadd_u32(a, b);
2893 }
2894
2895 // CHECK-LABEL: @test_vqadd_u64(
2896 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2897 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2898 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2899 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2900 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)2901 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
2902 return vqadd_u64(a, b);
2903 }
2904
2905 // CHECK-LABEL: @test_vqaddq_s8(
2906 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2907 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)2908 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
2909 return vqaddq_s8(a, b);
2910 }
2911
2912 // CHECK-LABEL: @test_vqaddq_s16(
2913 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2914 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2915 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2916 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2917 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)2918 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
2919 return vqaddq_s16(a, b);
2920 }
2921
2922 // CHECK-LABEL: @test_vqaddq_s32(
2923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2925 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2926 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2927 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)2928 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
2929 return vqaddq_s32(a, b);
2930 }
2931
2932 // CHECK-LABEL: @test_vqaddq_s64(
2933 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2934 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2935 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2936 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2937 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)2938 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
2939 return vqaddq_s64(a, b);
2940 }
2941
2942 // CHECK-LABEL: @test_vqaddq_u8(
2943 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2944 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)2945 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
2946 return vqaddq_u8(a, b);
2947 }
2948
2949 // CHECK-LABEL: @test_vqaddq_u16(
2950 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2951 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2952 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2953 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2954 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)2955 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
2956 return vqaddq_u16(a, b);
2957 }
2958
2959 // CHECK-LABEL: @test_vqaddq_u32(
2960 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2961 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2962 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2963 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2964 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)2965 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
2966 return vqaddq_u32(a, b);
2967 }
2968
2969 // CHECK-LABEL: @test_vqaddq_u64(
2970 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2971 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2972 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2973 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2974 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)2975 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
2976 return vqaddq_u64(a, b);
2977 }
2978
2979 // CHECK-LABEL: @test_vqsub_s8(
2980 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
2981 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)2982 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
2983 return vqsub_s8(a, b);
2984 }
2985
2986 // CHECK-LABEL: @test_vqsub_s16(
2987 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2988 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2989 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
2990 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
2991 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_s16(int16x4_t a,int16x4_t b)2992 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
2993 return vqsub_s16(a, b);
2994 }
2995
2996 // CHECK-LABEL: @test_vqsub_s32(
2997 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2998 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2999 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3000 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3001 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_s32(int32x2_t a,int32x2_t b)3002 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
3003 return vqsub_s32(a, b);
3004 }
3005
3006 // CHECK-LABEL: @test_vqsub_s64(
3007 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3008 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3009 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3010 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3011 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_s64(int64x1_t a,int64x1_t b)3012 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
3013 return vqsub_s64(a, b);
3014 }
3015
3016 // CHECK-LABEL: @test_vqsub_u8(
3017 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
3018 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)3019 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
3020 return vqsub_u8(a, b);
3021 }
3022
3023 // CHECK-LABEL: @test_vqsub_u16(
3024 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3025 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3026 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
3027 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3028 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)3029 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
3030 return vqsub_u16(a, b);
3031 }
3032
3033 // CHECK-LABEL: @test_vqsub_u32(
3034 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3035 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3036 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3037 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3038 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)3039 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
3040 return vqsub_u32(a, b);
3041 }
3042
3043 // CHECK-LABEL: @test_vqsub_u64(
3044 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3045 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3046 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3047 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3048 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)3049 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
3050 return vqsub_u64(a, b);
3051 }
3052
3053 // CHECK-LABEL: @test_vqsubq_s8(
3054 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3055 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)3056 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
3057 return vqsubq_s8(a, b);
3058 }
3059
3060 // CHECK-LABEL: @test_vqsubq_s16(
3061 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3062 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3063 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3064 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3065 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)3066 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
3067 return vqsubq_s16(a, b);
3068 }
3069
3070 // CHECK-LABEL: @test_vqsubq_s32(
3071 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3072 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3073 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3074 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3075 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)3076 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
3077 return vqsubq_s32(a, b);
3078 }
3079
3080 // CHECK-LABEL: @test_vqsubq_s64(
3081 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3082 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3083 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3084 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3085 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)3086 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
3087 return vqsubq_s64(a, b);
3088 }
3089
3090 // CHECK-LABEL: @test_vqsubq_u8(
3091 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3092 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)3093 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
3094 return vqsubq_u8(a, b);
3095 }
3096
3097 // CHECK-LABEL: @test_vqsubq_u16(
3098 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3099 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3100 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3101 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3102 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)3103 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
3104 return vqsubq_u16(a, b);
3105 }
3106
3107 // CHECK-LABEL: @test_vqsubq_u32(
3108 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3109 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3110 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3111 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3112 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)3113 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
3114 return vqsubq_u32(a, b);
3115 }
3116
3117 // CHECK-LABEL: @test_vqsubq_u64(
3118 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3119 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3120 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3121 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3122 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)3123 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
3124 return vqsubq_u64(a, b);
3125 }
3126
3127 // CHECK-LABEL: @test_vshl_s8(
3128 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3129 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)3130 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
3131 return vshl_s8(a, b);
3132 }
3133
3134 // CHECK-LABEL: @test_vshl_s16(
3135 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3136 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3137 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3138 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3139 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
test_vshl_s16(int16x4_t a,int16x4_t b)3140 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
3141 return vshl_s16(a, b);
3142 }
3143
3144 // CHECK-LABEL: @test_vshl_s32(
3145 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3146 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3147 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3148 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3149 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
test_vshl_s32(int32x2_t a,int32x2_t b)3150 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
3151 return vshl_s32(a, b);
3152 }
3153
3154 // CHECK-LABEL: @test_vshl_s64(
3155 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3156 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3157 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3158 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3159 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
test_vshl_s64(int64x1_t a,int64x1_t b)3160 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
3161 return vshl_s64(a, b);
3162 }
3163
3164 // CHECK-LABEL: @test_vshl_u8(
3165 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b)
3166 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)3167 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
3168 return vshl_u8(a, b);
3169 }
3170
3171 // CHECK-LABEL: @test_vshl_u16(
3172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3173 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3174 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b)
3175 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3176 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
test_vshl_u16(uint16x4_t a,int16x4_t b)3177 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
3178 return vshl_u16(a, b);
3179 }
3180
3181 // CHECK-LABEL: @test_vshl_u32(
3182 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3183 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3184 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b)
3185 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3186 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
test_vshl_u32(uint32x2_t a,int32x2_t b)3187 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
3188 return vshl_u32(a, b);
3189 }
3190
3191 // CHECK-LABEL: @test_vshl_u64(
3192 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3193 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3194 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b)
3195 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3196 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
test_vshl_u64(uint64x1_t a,int64x1_t b)3197 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
3198 return vshl_u64(a, b);
3199 }
3200
3201 // CHECK-LABEL: @test_vshlq_s8(
3202 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3203 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)3204 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3205 return vshlq_s8(a, b);
3206 }
3207
3208 // CHECK-LABEL: @test_vshlq_s16(
3209 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3210 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3211 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3212 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3213 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_s16(int16x8_t a,int16x8_t b)3214 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3215 return vshlq_s16(a, b);
3216 }
3217
3218 // CHECK-LABEL: @test_vshlq_s32(
3219 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3220 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3221 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3222 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3223 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_s32(int32x4_t a,int32x4_t b)3224 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3225 return vshlq_s32(a, b);
3226 }
3227
3228 // CHECK-LABEL: @test_vshlq_s64(
3229 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3230 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3231 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3232 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3233 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_s64(int64x2_t a,int64x2_t b)3234 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3235 return vshlq_s64(a, b);
3236 }
3237
3238 // CHECK-LABEL: @test_vshlq_u8(
3239 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
3240 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)3241 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3242 return vshlq_u8(a, b);
3243 }
3244
3245 // CHECK-LABEL: @test_vshlq_u16(
3246 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3247 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3248 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
3249 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3250 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)3251 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3252 return vshlq_u16(a, b);
3253 }
3254
3255 // CHECK-LABEL: @test_vshlq_u32(
3256 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3257 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3258 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
3259 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3260 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)3261 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3262 return vshlq_u32(a, b);
3263 }
3264
3265 // CHECK-LABEL: @test_vshlq_u64(
3266 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3267 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3268 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
3269 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3270 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)3271 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3272 return vshlq_u64(a, b);
3273 }
3274
3275 // CHECK-LABEL: @test_vqshl_s8(
3276 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3277 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)3278 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
3279 return vqshl_s8(a, b);
3280 }
3281
3282 // CHECK-LABEL: @test_vqshl_s16(
3283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3286 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3287 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_s16(int16x4_t a,int16x4_t b)3288 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
3289 return vqshl_s16(a, b);
3290 }
3291
3292 // CHECK-LABEL: @test_vqshl_s32(
3293 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3294 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3295 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3296 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3297 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_s32(int32x2_t a,int32x2_t b)3298 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
3299 return vqshl_s32(a, b);
3300 }
3301
3302 // CHECK-LABEL: @test_vqshl_s64(
3303 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3304 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3305 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3306 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3307 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_s64(int64x1_t a,int64x1_t b)3308 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
3309 return vqshl_s64(a, b);
3310 }
3311
3312 // CHECK-LABEL: @test_vqshl_u8(
3313 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3314 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)3315 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
3316 return vqshl_u8(a, b);
3317 }
3318
3319 // CHECK-LABEL: @test_vqshl_u16(
3320 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3321 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3322 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3323 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3324 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)3325 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
3326 return vqshl_u16(a, b);
3327 }
3328
3329 // CHECK-LABEL: @test_vqshl_u32(
3330 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3331 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3332 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3333 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3334 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)3335 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
3336 return vqshl_u32(a, b);
3337 }
3338
3339 // CHECK-LABEL: @test_vqshl_u64(
3340 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3341 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3342 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3343 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3344 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)3345 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
3346 return vqshl_u64(a, b);
3347 }
3348
3349 // CHECK-LABEL: @test_vqshlq_s8(
3350 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3351 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)3352 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
3353 return vqshlq_s8(a, b);
3354 }
3355
3356 // CHECK-LABEL: @test_vqshlq_s16(
3357 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3358 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3359 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3360 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3361 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)3362 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
3363 return vqshlq_s16(a, b);
3364 }
3365
3366 // CHECK-LABEL: @test_vqshlq_s32(
3367 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3368 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3369 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3370 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3371 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)3372 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
3373 return vqshlq_s32(a, b);
3374 }
3375
3376 // CHECK-LABEL: @test_vqshlq_s64(
3377 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3378 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3379 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3380 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3381 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)3382 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
3383 return vqshlq_s64(a, b);
3384 }
3385
3386 // CHECK-LABEL: @test_vqshlq_u8(
3387 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3388 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)3389 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
3390 return vqshlq_u8(a, b);
3391 }
3392
3393 // CHECK-LABEL: @test_vqshlq_u16(
3394 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3395 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3396 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3397 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3398 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)3399 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
3400 return vqshlq_u16(a, b);
3401 }
3402
3403 // CHECK-LABEL: @test_vqshlq_u32(
3404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3405 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3406 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3407 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3408 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)3409 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
3410 return vqshlq_u32(a, b);
3411 }
3412
3413 // CHECK-LABEL: @test_vqshlq_u64(
3414 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3415 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3416 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3417 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3418 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)3419 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
3420 return vqshlq_u64(a, b);
3421 }
3422
3423 // CHECK-LABEL: @test_vrshl_s8(
3424 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3425 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)3426 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3427 return vrshl_s8(a, b);
3428 }
3429
3430 // CHECK-LABEL: @test_vrshl_s16(
3431 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3432 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3433 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3434 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3435 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_s16(int16x4_t a,int16x4_t b)3436 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3437 return vrshl_s16(a, b);
3438 }
3439
3440 // CHECK-LABEL: @test_vrshl_s32(
3441 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3442 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3443 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3444 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3445 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_s32(int32x2_t a,int32x2_t b)3446 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3447 return vrshl_s32(a, b);
3448 }
3449
3450 // CHECK-LABEL: @test_vrshl_s64(
3451 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3452 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3453 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3454 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3455 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_s64(int64x1_t a,int64x1_t b)3456 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3457 return vrshl_s64(a, b);
3458 }
3459
3460 // CHECK-LABEL: @test_vrshl_u8(
3461 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3462 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)3463 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3464 return vrshl_u8(a, b);
3465 }
3466
3467 // CHECK-LABEL: @test_vrshl_u16(
3468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3471 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3472 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)3473 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3474 return vrshl_u16(a, b);
3475 }
3476
3477 // CHECK-LABEL: @test_vrshl_u32(
3478 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3479 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3480 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3481 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3482 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)3483 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3484 return vrshl_u32(a, b);
3485 }
3486
3487 // CHECK-LABEL: @test_vrshl_u64(
3488 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3489 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3490 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3491 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3492 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)3493 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3494 return vrshl_u64(a, b);
3495 }
3496
3497 // CHECK-LABEL: @test_vrshlq_s8(
3498 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3499 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)3500 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3501 return vrshlq_s8(a, b);
3502 }
3503
3504 // CHECK-LABEL: @test_vrshlq_s16(
3505 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3506 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3507 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3508 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3509 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)3510 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3511 return vrshlq_s16(a, b);
3512 }
3513
3514 // CHECK-LABEL: @test_vrshlq_s32(
3515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3516 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3517 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3518 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3519 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)3520 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3521 return vrshlq_s32(a, b);
3522 }
3523
3524 // CHECK-LABEL: @test_vrshlq_s64(
3525 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3526 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3527 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3528 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3529 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)3530 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3531 return vrshlq_s64(a, b);
3532 }
3533
3534 // CHECK-LABEL: @test_vrshlq_u8(
3535 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3536 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)3537 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3538 return vrshlq_u8(a, b);
3539 }
3540
3541 // CHECK-LABEL: @test_vrshlq_u16(
3542 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3543 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3544 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3545 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3546 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)3547 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3548 return vrshlq_u16(a, b);
3549 }
3550
3551 // CHECK-LABEL: @test_vrshlq_u32(
3552 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3553 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3554 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3555 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3556 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)3557 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3558 return vrshlq_u32(a, b);
3559 }
3560
3561 // CHECK-LABEL: @test_vrshlq_u64(
3562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3563 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3564 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3565 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3566 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)3567 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3568 return vrshlq_u64(a, b);
3569 }
3570
3571 // CHECK-LABEL: @test_vqrshl_s8(
3572 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3573 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)3574 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
3575 return vqrshl_s8(a, b);
3576 }
3577
3578 // CHECK-LABEL: @test_vqrshl_s16(
3579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3580 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3581 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3582 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3583 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)3584 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
3585 return vqrshl_s16(a, b);
3586 }
3587
3588 // CHECK-LABEL: @test_vqrshl_s32(
3589 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3590 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3591 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3592 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3593 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)3594 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
3595 return vqrshl_s32(a, b);
3596 }
3597
3598 // CHECK-LABEL: @test_vqrshl_s64(
3599 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3600 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3601 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3602 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3603 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)3604 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
3605 return vqrshl_s64(a, b);
3606 }
3607
3608 // CHECK-LABEL: @test_vqrshl_u8(
3609 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3610 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)3611 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
3612 return vqrshl_u8(a, b);
3613 }
3614
3615 // CHECK-LABEL: @test_vqrshl_u16(
3616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3617 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3618 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3619 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3620 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)3621 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
3622 return vqrshl_u16(a, b);
3623 }
3624
3625 // CHECK-LABEL: @test_vqrshl_u32(
3626 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3627 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3628 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3629 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3630 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)3631 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
3632 return vqrshl_u32(a, b);
3633 }
3634
3635 // CHECK-LABEL: @test_vqrshl_u64(
3636 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3637 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3638 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3639 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3640 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)3641 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
3642 return vqrshl_u64(a, b);
3643 }
3644
3645 // CHECK-LABEL: @test_vqrshlq_s8(
3646 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3647 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)3648 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
3649 return vqrshlq_s8(a, b);
3650 }
3651
3652 // CHECK-LABEL: @test_vqrshlq_s16(
3653 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3654 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3655 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3656 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3657 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)3658 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
3659 return vqrshlq_s16(a, b);
3660 }
3661
3662 // CHECK-LABEL: @test_vqrshlq_s32(
3663 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3664 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3665 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3666 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3667 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)3668 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
3669 return vqrshlq_s32(a, b);
3670 }
3671
3672 // CHECK-LABEL: @test_vqrshlq_s64(
3673 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3674 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3675 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3676 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3677 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)3678 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
3679 return vqrshlq_s64(a, b);
3680 }
3681
3682 // CHECK-LABEL: @test_vqrshlq_u8(
3683 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3684 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)3685 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
3686 return vqrshlq_u8(a, b);
3687 }
3688
3689 // CHECK-LABEL: @test_vqrshlq_u16(
3690 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3691 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3692 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3693 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3694 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)3695 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
3696 return vqrshlq_u16(a, b);
3697 }
3698
3699 // CHECK-LABEL: @test_vqrshlq_u32(
3700 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3701 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3702 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3703 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3704 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)3705 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
3706 return vqrshlq_u32(a, b);
3707 }
3708
3709 // CHECK-LABEL: @test_vqrshlq_u64(
3710 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3711 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3712 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3713 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3714 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)3715 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
3716 return vqrshlq_u64(a, b);
3717 }
3718
3719 // CHECK-LABEL: @test_vsli_n_p64(
3720 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3721 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3722 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3723 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3724 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
3725 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_p64(poly64x1_t a,poly64x1_t b)3726 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
3727 return vsli_n_p64(a, b, 0);
3728 }
3729
3730 // CHECK-LABEL: @test_vsliq_n_p64(
3731 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3732 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3733 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3734 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3735 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
3736 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_p64(poly64x2_t a,poly64x2_t b)3737 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
3738 return vsliq_n_p64(a, b, 0);
3739 }
3740
3741 // CHECK-LABEL: @test_vmax_s8(
3742 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
3743 // CHECK: ret <8 x i8> [[VMAX_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)3744 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
3745 return vmax_s8(a, b);
3746 }
3747
3748 // CHECK-LABEL: @test_vmax_s16(
3749 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3750 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3751 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
3752 // CHECK: ret <4 x i16> [[VMAX2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)3753 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
3754 return vmax_s16(a, b);
3755 }
3756
3757 // CHECK-LABEL: @test_vmax_s32(
3758 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3759 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3760 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
3761 // CHECK: ret <2 x i32> [[VMAX2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)3762 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
3763 return vmax_s32(a, b);
3764 }
3765
3766 // CHECK-LABEL: @test_vmax_u8(
3767 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
3768 // CHECK: ret <8 x i8> [[VMAX_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)3769 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
3770 return vmax_u8(a, b);
3771 }
3772
3773 // CHECK-LABEL: @test_vmax_u16(
3774 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3775 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3776 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
3777 // CHECK: ret <4 x i16> [[VMAX2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)3778 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
3779 return vmax_u16(a, b);
3780 }
3781
3782 // CHECK-LABEL: @test_vmax_u32(
3783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3784 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3785 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
3786 // CHECK: ret <2 x i32> [[VMAX2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)3787 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
3788 return vmax_u32(a, b);
3789 }
3790
3791 // CHECK-LABEL: @test_vmax_f32(
3792 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3793 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3794 // CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
3795 // CHECK: ret <2 x float> [[VMAX2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)3796 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
3797 return vmax_f32(a, b);
3798 }
3799
3800 // CHECK-LABEL: @test_vmaxq_s8(
3801 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
3802 // CHECK: ret <16 x i8> [[VMAX_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)3803 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
3804 return vmaxq_s8(a, b);
3805 }
3806
3807 // CHECK-LABEL: @test_vmaxq_s16(
3808 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3809 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3810 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
3811 // CHECK: ret <8 x i16> [[VMAX2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)3812 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
3813 return vmaxq_s16(a, b);
3814 }
3815
3816 // CHECK-LABEL: @test_vmaxq_s32(
3817 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3818 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3819 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
3820 // CHECK: ret <4 x i32> [[VMAX2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)3821 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
3822 return vmaxq_s32(a, b);
3823 }
3824
3825 // CHECK-LABEL: @test_vmaxq_u8(
3826 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
3827 // CHECK: ret <16 x i8> [[VMAX_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)3828 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
3829 return vmaxq_u8(a, b);
3830 }
3831
3832 // CHECK-LABEL: @test_vmaxq_u16(
3833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3834 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3835 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
3836 // CHECK: ret <8 x i16> [[VMAX2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)3837 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
3838 return vmaxq_u16(a, b);
3839 }
3840
3841 // CHECK-LABEL: @test_vmaxq_u32(
3842 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3843 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3844 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
3845 // CHECK: ret <4 x i32> [[VMAX2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)3846 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
3847 return vmaxq_u32(a, b);
3848 }
3849
3850 // CHECK-LABEL: @test_vmaxq_f32(
3851 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3852 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3853 // CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
3854 // CHECK: ret <4 x float> [[VMAX2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)3855 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
3856 return vmaxq_f32(a, b);
3857 }
3858
3859 // CHECK-LABEL: @test_vmaxq_f64(
3860 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3861 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3862 // CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
3863 // CHECK: ret <2 x double> [[VMAX2_I]]
test_vmaxq_f64(float64x2_t a,float64x2_t b)3864 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
3865 return vmaxq_f64(a, b);
3866 }
3867
3868 // CHECK-LABEL: @test_vmin_s8(
3869 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
3870 // CHECK: ret <8 x i8> [[VMIN_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)3871 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
3872 return vmin_s8(a, b);
3873 }
3874
3875 // CHECK-LABEL: @test_vmin_s16(
3876 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3877 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3878 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
3879 // CHECK: ret <4 x i16> [[VMIN2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)3880 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
3881 return vmin_s16(a, b);
3882 }
3883
3884 // CHECK-LABEL: @test_vmin_s32(
3885 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3886 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3887 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
3888 // CHECK: ret <2 x i32> [[VMIN2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)3889 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
3890 return vmin_s32(a, b);
3891 }
3892
3893 // CHECK-LABEL: @test_vmin_u8(
3894 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
3895 // CHECK: ret <8 x i8> [[VMIN_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)3896 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
3897 return vmin_u8(a, b);
3898 }
3899
3900 // CHECK-LABEL: @test_vmin_u16(
3901 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3902 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3903 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
3904 // CHECK: ret <4 x i16> [[VMIN2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)3905 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
3906 return vmin_u16(a, b);
3907 }
3908
3909 // CHECK-LABEL: @test_vmin_u32(
3910 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3911 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3912 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
3913 // CHECK: ret <2 x i32> [[VMIN2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)3914 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
3915 return vmin_u32(a, b);
3916 }
3917
3918 // CHECK-LABEL: @test_vmin_f32(
3919 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3920 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3921 // CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b)
3922 // CHECK: ret <2 x float> [[VMIN2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)3923 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
3924 return vmin_f32(a, b);
3925 }
3926
3927 // CHECK-LABEL: @test_vminq_s8(
3928 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
3929 // CHECK: ret <16 x i8> [[VMIN_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)3930 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
3931 return vminq_s8(a, b);
3932 }
3933
3934 // CHECK-LABEL: @test_vminq_s16(
3935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3936 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3937 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
3938 // CHECK: ret <8 x i16> [[VMIN2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)3939 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
3940 return vminq_s16(a, b);
3941 }
3942
3943 // CHECK-LABEL: @test_vminq_s32(
3944 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3945 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3946 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
3947 // CHECK: ret <4 x i32> [[VMIN2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)3948 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
3949 return vminq_s32(a, b);
3950 }
3951
3952 // CHECK-LABEL: @test_vminq_u8(
3953 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
3954 // CHECK: ret <16 x i8> [[VMIN_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)3955 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
3956 return vminq_u8(a, b);
3957 }
3958
3959 // CHECK-LABEL: @test_vminq_u16(
3960 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3961 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3962 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
3963 // CHECK: ret <8 x i16> [[VMIN2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)3964 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
3965 return vminq_u16(a, b);
3966 }
3967
3968 // CHECK-LABEL: @test_vminq_u32(
3969 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3970 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3971 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
3972 // CHECK: ret <4 x i32> [[VMIN2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)3973 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
3974 return vminq_u32(a, b);
3975 }
3976
3977 // CHECK-LABEL: @test_vminq_f32(
3978 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3979 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3980 // CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b)
3981 // CHECK: ret <4 x float> [[VMIN2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)3982 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
3983 return vminq_f32(a, b);
3984 }
3985
3986 // CHECK-LABEL: @test_vminq_f64(
3987 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3988 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3989 // CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b)
3990 // CHECK: ret <2 x double> [[VMIN2_I]]
test_vminq_f64(float64x2_t a,float64x2_t b)3991 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
3992 return vminq_f64(a, b);
3993 }
3994
3995 // CHECK-LABEL: @test_vmaxnm_f32(
3996 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3997 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3998 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
3999 // CHECK: ret <2 x float> [[VMAXNM2_I]]
test_vmaxnm_f32(float32x2_t a,float32x2_t b)4000 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
4001 return vmaxnm_f32(a, b);
4002 }
4003
4004 // CHECK-LABEL: @test_vmaxnmq_f32(
4005 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4006 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4007 // CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
4008 // CHECK: ret <4 x float> [[VMAXNM2_I]]
test_vmaxnmq_f32(float32x4_t a,float32x4_t b)4009 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
4010 return vmaxnmq_f32(a, b);
4011 }
4012
4013 // CHECK-LABEL: @test_vmaxnmq_f64(
4014 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4015 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4016 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
4017 // CHECK: ret <2 x double> [[VMAXNM2_I]]
test_vmaxnmq_f64(float64x2_t a,float64x2_t b)4018 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
4019 return vmaxnmq_f64(a, b);
4020 }
4021
4022 // CHECK-LABEL: @test_vminnm_f32(
4023 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4024 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4025 // CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
4026 // CHECK: ret <2 x float> [[VMINNM2_I]]
test_vminnm_f32(float32x2_t a,float32x2_t b)4027 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
4028 return vminnm_f32(a, b);
4029 }
4030
4031 // CHECK-LABEL: @test_vminnmq_f32(
4032 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4033 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4034 // CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
4035 // CHECK: ret <4 x float> [[VMINNM2_I]]
test_vminnmq_f32(float32x4_t a,float32x4_t b)4036 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
4037 return vminnmq_f32(a, b);
4038 }
4039
4040 // CHECK-LABEL: @test_vminnmq_f64(
4041 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4042 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4043 // CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
4044 // CHECK: ret <2 x double> [[VMINNM2_I]]
test_vminnmq_f64(float64x2_t a,float64x2_t b)4045 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
4046 return vminnmq_f64(a, b);
4047 }
4048
4049 // CHECK-LABEL: @test_vpmax_s8(
4050 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4051 // CHECK: ret <8 x i8> [[VPMAX_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)4052 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
4053 return vpmax_s8(a, b);
4054 }
4055
4056 // CHECK-LABEL: @test_vpmax_s16(
4057 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4058 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4059 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4060 // CHECK: ret <4 x i16> [[VPMAX2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)4061 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
4062 return vpmax_s16(a, b);
4063 }
4064
4065 // CHECK-LABEL: @test_vpmax_s32(
4066 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4067 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4068 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4069 // CHECK: ret <2 x i32> [[VPMAX2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)4070 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
4071 return vpmax_s32(a, b);
4072 }
4073
4074 // CHECK-LABEL: @test_vpmax_u8(
4075 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4076 // CHECK: ret <8 x i8> [[VPMAX_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)4077 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
4078 return vpmax_u8(a, b);
4079 }
4080
4081 // CHECK-LABEL: @test_vpmax_u16(
4082 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4083 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4084 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4085 // CHECK: ret <4 x i16> [[VPMAX2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)4086 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
4087 return vpmax_u16(a, b);
4088 }
4089
4090 // CHECK-LABEL: @test_vpmax_u32(
4091 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4092 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4093 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4094 // CHECK: ret <2 x i32> [[VPMAX2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)4095 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
4096 return vpmax_u32(a, b);
4097 }
4098
4099 // CHECK-LABEL: @test_vpmax_f32(
4100 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4101 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4102 // CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
4103 // CHECK: ret <2 x float> [[VPMAX2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)4104 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
4105 return vpmax_f32(a, b);
4106 }
4107
4108 // CHECK-LABEL: @test_vpmaxq_s8(
4109 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4110 // CHECK: ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_s8(int8x16_t a,int8x16_t b)4111 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
4112 return vpmaxq_s8(a, b);
4113 }
4114
4115 // CHECK-LABEL: @test_vpmaxq_s16(
4116 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4117 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4118 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4119 // CHECK: ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_s16(int16x8_t a,int16x8_t b)4120 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
4121 return vpmaxq_s16(a, b);
4122 }
4123
4124 // CHECK-LABEL: @test_vpmaxq_s32(
4125 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4126 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4127 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4128 // CHECK: ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_s32(int32x4_t a,int32x4_t b)4129 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
4130 return vpmaxq_s32(a, b);
4131 }
4132
4133 // CHECK-LABEL: @test_vpmaxq_u8(
4134 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4135 // CHECK: ret <16 x i8> [[VPMAX_I]]
test_vpmaxq_u8(uint8x16_t a,uint8x16_t b)4136 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
4137 return vpmaxq_u8(a, b);
4138 }
4139
4140 // CHECK-LABEL: @test_vpmaxq_u16(
4141 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4142 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4143 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4144 // CHECK: ret <8 x i16> [[VPMAX2_I]]
test_vpmaxq_u16(uint16x8_t a,uint16x8_t b)4145 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
4146 return vpmaxq_u16(a, b);
4147 }
4148
4149 // CHECK-LABEL: @test_vpmaxq_u32(
4150 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4151 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4152 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4153 // CHECK: ret <4 x i32> [[VPMAX2_I]]
test_vpmaxq_u32(uint32x4_t a,uint32x4_t b)4154 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
4155 return vpmaxq_u32(a, b);
4156 }
4157
4158 // CHECK-LABEL: @test_vpmaxq_f32(
4159 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4160 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4161 // CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
4162 // CHECK: ret <4 x float> [[VPMAX2_I]]
test_vpmaxq_f32(float32x4_t a,float32x4_t b)4163 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
4164 return vpmaxq_f32(a, b);
4165 }
4166
4167 // CHECK-LABEL: @test_vpmaxq_f64(
4168 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4169 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4170 // CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
4171 // CHECK: ret <2 x double> [[VPMAX2_I]]
test_vpmaxq_f64(float64x2_t a,float64x2_t b)4172 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
4173 return vpmaxq_f64(a, b);
4174 }
4175
4176 // CHECK-LABEL: @test_vpmin_s8(
4177 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4178 // CHECK: ret <8 x i8> [[VPMIN_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)4179 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
4180 return vpmin_s8(a, b);
4181 }
4182
4183 // CHECK-LABEL: @test_vpmin_s16(
4184 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4185 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4186 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4187 // CHECK: ret <4 x i16> [[VPMIN2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)4188 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
4189 return vpmin_s16(a, b);
4190 }
4191
4192 // CHECK-LABEL: @test_vpmin_s32(
4193 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4194 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4195 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4196 // CHECK: ret <2 x i32> [[VPMIN2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)4197 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
4198 return vpmin_s32(a, b);
4199 }
4200
4201 // CHECK-LABEL: @test_vpmin_u8(
4202 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4203 // CHECK: ret <8 x i8> [[VPMIN_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)4204 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
4205 return vpmin_u8(a, b);
4206 }
4207
4208 // CHECK-LABEL: @test_vpmin_u16(
4209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4210 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4211 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4212 // CHECK: ret <4 x i16> [[VPMIN2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)4213 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
4214 return vpmin_u16(a, b);
4215 }
4216
4217 // CHECK-LABEL: @test_vpmin_u32(
4218 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4219 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4220 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4221 // CHECK: ret <2 x i32> [[VPMIN2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)4222 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
4223 return vpmin_u32(a, b);
4224 }
4225
4226 // CHECK-LABEL: @test_vpmin_f32(
4227 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4228 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4229 // CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
4230 // CHECK: ret <2 x float> [[VPMIN2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)4231 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
4232 return vpmin_f32(a, b);
4233 }
4234
4235 // CHECK-LABEL: @test_vpminq_s8(
4236 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4237 // CHECK: ret <16 x i8> [[VPMIN_I]]
test_vpminq_s8(int8x16_t a,int8x16_t b)4238 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
4239 return vpminq_s8(a, b);
4240 }
4241
4242 // CHECK-LABEL: @test_vpminq_s16(
4243 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4244 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4245 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4246 // CHECK: ret <8 x i16> [[VPMIN2_I]]
test_vpminq_s16(int16x8_t a,int16x8_t b)4247 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
4248 return vpminq_s16(a, b);
4249 }
4250
4251 // CHECK-LABEL: @test_vpminq_s32(
4252 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4253 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4254 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4255 // CHECK: ret <4 x i32> [[VPMIN2_I]]
test_vpminq_s32(int32x4_t a,int32x4_t b)4256 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
4257 return vpminq_s32(a, b);
4258 }
4259
4260 // CHECK-LABEL: @test_vpminq_u8(
4261 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4262 // CHECK: ret <16 x i8> [[VPMIN_I]]
test_vpminq_u8(uint8x16_t a,uint8x16_t b)4263 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
4264 return vpminq_u8(a, b);
4265 }
4266
4267 // CHECK-LABEL: @test_vpminq_u16(
4268 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4270 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4271 // CHECK: ret <8 x i16> [[VPMIN2_I]]
test_vpminq_u16(uint16x8_t a,uint16x8_t b)4272 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
4273 return vpminq_u16(a, b);
4274 }
4275
4276 // CHECK-LABEL: @test_vpminq_u32(
4277 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4278 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4279 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4280 // CHECK: ret <4 x i32> [[VPMIN2_I]]
test_vpminq_u32(uint32x4_t a,uint32x4_t b)4281 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
4282 return vpminq_u32(a, b);
4283 }
4284
4285 // CHECK-LABEL: @test_vpminq_f32(
4286 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4287 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4288 // CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
4289 // CHECK: ret <4 x float> [[VPMIN2_I]]
test_vpminq_f32(float32x4_t a,float32x4_t b)4290 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
4291 return vpminq_f32(a, b);
4292 }
4293
4294 // CHECK-LABEL: @test_vpminq_f64(
4295 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4296 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4297 // CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
4298 // CHECK: ret <2 x double> [[VPMIN2_I]]
test_vpminq_f64(float64x2_t a,float64x2_t b)4299 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
4300 return vpminq_f64(a, b);
4301 }
4302
4303 // CHECK-LABEL: @test_vpmaxnm_f32(
4304 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4305 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4306 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
4307 // CHECK: ret <2 x float> [[VPMAXNM2_I]]
test_vpmaxnm_f32(float32x2_t a,float32x2_t b)4308 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
4309 return vpmaxnm_f32(a, b);
4310 }
4311
4312 // CHECK-LABEL: @test_vpmaxnmq_f32(
4313 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4314 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4315 // CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
4316 // CHECK: ret <4 x float> [[VPMAXNM2_I]]
test_vpmaxnmq_f32(float32x4_t a,float32x4_t b)4317 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
4318 return vpmaxnmq_f32(a, b);
4319 }
4320
4321 // CHECK-LABEL: @test_vpmaxnmq_f64(
4322 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4323 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4324 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
4325 // CHECK: ret <2 x double> [[VPMAXNM2_I]]
test_vpmaxnmq_f64(float64x2_t a,float64x2_t b)4326 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
4327 return vpmaxnmq_f64(a, b);
4328 }
4329
4330 // CHECK-LABEL: @test_vpminnm_f32(
4331 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4332 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4333 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
4334 // CHECK: ret <2 x float> [[VPMINNM2_I]]
test_vpminnm_f32(float32x2_t a,float32x2_t b)4335 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
4336 return vpminnm_f32(a, b);
4337 }
4338
4339 // CHECK-LABEL: @test_vpminnmq_f32(
4340 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4341 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4342 // CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
4343 // CHECK: ret <4 x float> [[VPMINNM2_I]]
test_vpminnmq_f32(float32x4_t a,float32x4_t b)4344 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
4345 return vpminnmq_f32(a, b);
4346 }
4347
4348 // CHECK-LABEL: @test_vpminnmq_f64(
4349 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4350 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4351 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
4352 // CHECK: ret <2 x double> [[VPMINNM2_I]]
test_vpminnmq_f64(float64x2_t a,float64x2_t b)4353 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
4354 return vpminnmq_f64(a, b);
4355 }
4356
4357 // CHECK-LABEL: @test_vpadd_s8(
4358 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4359 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)4360 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
4361 return vpadd_s8(a, b);
4362 }
4363
4364 // CHECK-LABEL: @test_vpadd_s16(
4365 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4366 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4367 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4368 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4369 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_s16(int16x4_t a,int16x4_t b)4370 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
4371 return vpadd_s16(a, b);
4372 }
4373
4374 // CHECK-LABEL: @test_vpadd_s32(
4375 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4376 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4377 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4378 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4379 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_s32(int32x2_t a,int32x2_t b)4380 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
4381 return vpadd_s32(a, b);
4382 }
4383
4384 // CHECK-LABEL: @test_vpadd_u8(
4385 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4386 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)4387 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
4388 return vpadd_u8(a, b);
4389 }
4390
4391 // CHECK-LABEL: @test_vpadd_u16(
4392 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4393 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4394 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4395 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4396 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)4397 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
4398 return vpadd_u16(a, b);
4399 }
4400
4401 // CHECK-LABEL: @test_vpadd_u32(
4402 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4403 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4404 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4405 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4406 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)4407 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
4408 return vpadd_u32(a, b);
4409 }
4410
4411 // CHECK-LABEL: @test_vpadd_f32(
4412 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4413 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4414 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
4415 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
4416 // CHECK: ret <2 x float> [[VPADD_V2_I]]
test_vpadd_f32(float32x2_t a,float32x2_t b)4417 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
4418 return vpadd_f32(a, b);
4419 }
4420
4421 // CHECK-LABEL: @test_vpaddq_s8(
4422 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4423 // CHECK: ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_s8(int8x16_t a,int8x16_t b)4424 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
4425 return vpaddq_s8(a, b);
4426 }
4427
4428 // CHECK-LABEL: @test_vpaddq_s16(
4429 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4430 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4431 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4432 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4433 // CHECK: ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_s16(int16x8_t a,int16x8_t b)4434 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
4435 return vpaddq_s16(a, b);
4436 }
4437
4438 // CHECK-LABEL: @test_vpaddq_s32(
4439 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4440 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4441 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4442 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4443 // CHECK: ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_s32(int32x4_t a,int32x4_t b)4444 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
4445 return vpaddq_s32(a, b);
4446 }
4447
4448 // CHECK-LABEL: @test_vpaddq_u8(
4449 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4450 // CHECK: ret <16 x i8> [[VPADDQ_V_I]]
test_vpaddq_u8(uint8x16_t a,uint8x16_t b)4451 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
4452 return vpaddq_u8(a, b);
4453 }
4454
4455 // CHECK-LABEL: @test_vpaddq_u16(
4456 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4457 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4458 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4459 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4460 // CHECK: ret <8 x i16> [[VPADDQ_V2_I]]
test_vpaddq_u16(uint16x8_t a,uint16x8_t b)4461 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
4462 return vpaddq_u16(a, b);
4463 }
4464
4465 // CHECK-LABEL: @test_vpaddq_u32(
4466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4467 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4468 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4469 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4470 // CHECK: ret <4 x i32> [[VPADDQ_V2_I]]
test_vpaddq_u32(uint32x4_t a,uint32x4_t b)4471 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
4472 return vpaddq_u32(a, b);
4473 }
4474
4475 // CHECK-LABEL: @test_vpaddq_f32(
4476 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4477 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4478 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
4479 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
4480 // CHECK: ret <4 x float> [[VPADDQ_V2_I]]
test_vpaddq_f32(float32x4_t a,float32x4_t b)4481 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
4482 return vpaddq_f32(a, b);
4483 }
4484
4485 // CHECK-LABEL: @test_vpaddq_f64(
4486 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4487 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4488 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
4489 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
4490 // CHECK: ret <2 x double> [[VPADDQ_V2_I]]
test_vpaddq_f64(float64x2_t a,float64x2_t b)4491 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
4492 return vpaddq_f64(a, b);
4493 }
4494
4495 // CHECK-LABEL: @test_vqdmulh_s16(
4496 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4497 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4498 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4499 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
4500 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)4501 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
4502 return vqdmulh_s16(a, b);
4503 }
4504
4505 // CHECK-LABEL: @test_vqdmulh_s32(
4506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4507 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4508 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4509 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
4510 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)4511 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
4512 return vqdmulh_s32(a, b);
4513 }
4514
4515 // CHECK-LABEL: @test_vqdmulhq_s16(
4516 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4517 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4518 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4519 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
4520 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)4521 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
4522 return vqdmulhq_s16(a, b);
4523 }
4524
4525 // CHECK-LABEL: @test_vqdmulhq_s32(
4526 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4527 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4528 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4529 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
4530 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)4531 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
4532 return vqdmulhq_s32(a, b);
4533 }
4534
4535 // CHECK-LABEL: @test_vqrdmulh_s16(
4536 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4537 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4538 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4539 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
4540 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)4541 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
4542 return vqrdmulh_s16(a, b);
4543 }
4544
4545 // CHECK-LABEL: @test_vqrdmulh_s32(
4546 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4547 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4548 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4549 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
4550 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)4551 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
4552 return vqrdmulh_s32(a, b);
4553 }
4554
4555 // CHECK-LABEL: @test_vqrdmulhq_s16(
4556 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4557 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4558 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4559 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
4560 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)4561 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
4562 return vqrdmulhq_s16(a, b);
4563 }
4564
4565 // CHECK-LABEL: @test_vqrdmulhq_s32(
4566 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4567 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4568 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4569 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
4570 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)4571 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
4572 return vqrdmulhq_s32(a, b);
4573 }
4574
4575 // CHECK-LABEL: @test_vmulx_f32(
4576 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4577 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4578 // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
4579 // CHECK: ret <2 x float> [[VMULX2_I]]
test_vmulx_f32(float32x2_t a,float32x2_t b)4580 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
4581 return vmulx_f32(a, b);
4582 }
4583
4584 // CHECK-LABEL: @test_vmulxq_f32(
4585 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4586 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4587 // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
4588 // CHECK: ret <4 x float> [[VMULX2_I]]
test_vmulxq_f32(float32x4_t a,float32x4_t b)4589 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
4590 return vmulxq_f32(a, b);
4591 }
4592
4593 // CHECK-LABEL: @test_vmulxq_f64(
4594 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4595 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4596 // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
4597 // CHECK: ret <2 x double> [[VMULX2_I]]
test_vmulxq_f64(float64x2_t a,float64x2_t b)4598 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
4599 return vmulxq_f64(a, b);
4600 }
4601
4602 // CHECK-LABEL: @test_vshl_n_s8(
4603 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4604 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)4605 int8x8_t test_vshl_n_s8(int8x8_t a) {
4606 return vshl_n_s8(a, 3);
4607 }
4608
4609 // CHECK-LABEL: @test_vshl_n_s16(
4610 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4612 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4613 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)4614 int16x4_t test_vshl_n_s16(int16x4_t a) {
4615 return vshl_n_s16(a, 3);
4616 }
4617
4618 // CHECK-LABEL: @test_vshl_n_s32(
4619 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4620 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4621 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4622 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)4623 int32x2_t test_vshl_n_s32(int32x2_t a) {
4624 return vshl_n_s32(a, 3);
4625 }
4626
4627 // CHECK-LABEL: @test_vshlq_n_s8(
4628 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4629 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)4630 int8x16_t test_vshlq_n_s8(int8x16_t a) {
4631 return vshlq_n_s8(a, 3);
4632 }
4633
4634 // CHECK-LABEL: @test_vshlq_n_s16(
4635 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4636 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4637 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4638 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)4639 int16x8_t test_vshlq_n_s16(int16x8_t a) {
4640 return vshlq_n_s16(a, 3);
4641 }
4642
4643 // CHECK-LABEL: @test_vshlq_n_s32(
4644 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4645 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4646 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4647 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)4648 int32x4_t test_vshlq_n_s32(int32x4_t a) {
4649 return vshlq_n_s32(a, 3);
4650 }
4651
4652 // CHECK-LABEL: @test_vshlq_n_s64(
4653 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4654 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4655 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4656 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)4657 int64x2_t test_vshlq_n_s64(int64x2_t a) {
4658 return vshlq_n_s64(a, 3);
4659 }
4660
4661 // CHECK-LABEL: @test_vshl_n_u8(
4662 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4663 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(uint8x8_t a)4664 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
4665 return vshl_n_u8(a, 3);
4666 }
4667
4668 // CHECK-LABEL: @test_vshl_n_u16(
4669 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4670 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4671 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4672 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(uint16x4_t a)4673 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
4674 return vshl_n_u16(a, 3);
4675 }
4676
4677 // CHECK-LABEL: @test_vshl_n_u32(
4678 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4679 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4680 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4681 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(uint32x2_t a)4682 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
4683 return vshl_n_u32(a, 3);
4684 }
4685
4686 // CHECK-LABEL: @test_vshlq_n_u8(
4687 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4688 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(uint8x16_t a)4689 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
4690 return vshlq_n_u8(a, 3);
4691 }
4692
4693 // CHECK-LABEL: @test_vshlq_n_u16(
4694 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4695 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4696 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4697 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(uint16x8_t a)4698 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
4699 return vshlq_n_u16(a, 3);
4700 }
4701
4702 // CHECK-LABEL: @test_vshlq_n_u32(
4703 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4704 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4706 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(uint32x4_t a)4707 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
4708 return vshlq_n_u32(a, 3);
4709 }
4710
4711 // CHECK-LABEL: @test_vshlq_n_u64(
4712 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4713 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4714 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4715 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(uint64x2_t a)4716 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
4717 return vshlq_n_u64(a, 3);
4718 }
4719
4720 // CHECK-LABEL: @test_vshr_n_s8(
4721 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4722 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)4723 int8x8_t test_vshr_n_s8(int8x8_t a) {
4724 return vshr_n_s8(a, 3);
4725 }
4726
4727 // CHECK-LABEL: @test_vshr_n_s16(
4728 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4729 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4730 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4731 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)4732 int16x4_t test_vshr_n_s16(int16x4_t a) {
4733 return vshr_n_s16(a, 3);
4734 }
4735
4736 // CHECK-LABEL: @test_vshr_n_s32(
4737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4738 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4739 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
4740 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)4741 int32x2_t test_vshr_n_s32(int32x2_t a) {
4742 return vshr_n_s32(a, 3);
4743 }
4744
4745 // CHECK-LABEL: @test_vshrq_n_s8(
4746 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4747 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)4748 int8x16_t test_vshrq_n_s8(int8x16_t a) {
4749 return vshrq_n_s8(a, 3);
4750 }
4751
4752 // CHECK-LABEL: @test_vshrq_n_s16(
4753 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4754 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4755 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4756 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)4757 int16x8_t test_vshrq_n_s16(int16x8_t a) {
4758 return vshrq_n_s16(a, 3);
4759 }
4760
4761 // CHECK-LABEL: @test_vshrq_n_s32(
4762 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4763 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4764 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4765 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)4766 int32x4_t test_vshrq_n_s32(int32x4_t a) {
4767 return vshrq_n_s32(a, 3);
4768 }
4769
4770 // CHECK-LABEL: @test_vshrq_n_s64(
4771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4772 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4773 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
4774 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)4775 int64x2_t test_vshrq_n_s64(int64x2_t a) {
4776 return vshrq_n_s64(a, 3);
4777 }
4778
4779 // CHECK-LABEL: @test_vshr_n_u8(
4780 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4781 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(uint8x8_t a)4782 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
4783 return vshr_n_u8(a, 3);
4784 }
4785
4786 // CHECK-LABEL: @test_vshr_n_u16(
4787 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4788 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4789 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4790 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(uint16x4_t a)4791 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
4792 return vshr_n_u16(a, 3);
4793 }
4794
4795 // CHECK-LABEL: @test_vshr_n_u32(
4796 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4797 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4798 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
4799 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(uint32x2_t a)4800 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
4801 return vshr_n_u32(a, 3);
4802 }
4803
4804 // CHECK-LABEL: @test_vshrq_n_u8(
4805 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4806 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(uint8x16_t a)4807 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
4808 return vshrq_n_u8(a, 3);
4809 }
4810
4811 // CHECK-LABEL: @test_vshrq_n_u16(
4812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4813 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4815 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(uint16x8_t a)4816 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
4817 return vshrq_n_u16(a, 3);
4818 }
4819
4820 // CHECK-LABEL: @test_vshrq_n_u32(
4821 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4822 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4823 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4824 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(uint32x4_t a)4825 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
4826 return vshrq_n_u32(a, 3);
4827 }
4828
4829 // CHECK-LABEL: @test_vshrq_n_u64(
4830 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4831 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4832 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
4833 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(uint64x2_t a)4834 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
4835 return vshrq_n_u64(a, 3);
4836 }
4837
4838 // CHECK-LABEL: @test_vsra_n_s8(
4839 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4840 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4841 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)4842 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
4843 return vsra_n_s8(a, b, 3);
4844 }
4845
4846 // CHECK-LABEL: @test_vsra_n_s16(
4847 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4848 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4849 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4850 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4851 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4852 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4853 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)4854 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
4855 return vsra_n_s16(a, b, 3);
4856 }
4857
4858 // CHECK-LABEL: @test_vsra_n_s32(
4859 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4860 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4861 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4862 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4863 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
4864 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4865 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)4866 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
4867 return vsra_n_s32(a, b, 3);
4868 }
4869
4870 // CHECK-LABEL: @test_vsraq_n_s8(
4871 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4872 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4873 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)4874 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
4875 return vsraq_n_s8(a, b, 3);
4876 }
4877
4878 // CHECK-LABEL: @test_vsraq_n_s16(
4879 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4880 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4881 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4882 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4883 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4884 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4885 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)4886 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
4887 return vsraq_n_s16(a, b, 3);
4888 }
4889
4890 // CHECK-LABEL: @test_vsraq_n_s32(
4891 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4892 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4893 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4894 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4895 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4896 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4897 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)4898 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
4899 return vsraq_n_s32(a, b, 3);
4900 }
4901
4902 // CHECK-LABEL: @test_vsraq_n_s64(
4903 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4904 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4905 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4906 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4907 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
4908 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4909 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)4910 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
4911 return vsraq_n_s64(a, b, 3);
4912 }
4913
4914 // CHECK-LABEL: @test_vsra_n_u8(
4915 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4916 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4917 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_u8(uint8x8_t a,uint8x8_t b)4918 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
4919 return vsra_n_u8(a, b, 3);
4920 }
4921
4922 // CHECK-LABEL: @test_vsra_n_u16(
4923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4925 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4926 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4927 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4928 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4929 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_u16(uint16x4_t a,uint16x4_t b)4930 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
4931 return vsra_n_u16(a, b, 3);
4932 }
4933
4934 // CHECK-LABEL: @test_vsra_n_u32(
4935 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4936 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4937 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4938 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4939 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
4940 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4941 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_u32(uint32x2_t a,uint32x2_t b)4942 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
4943 return vsra_n_u32(a, b, 3);
4944 }
4945
4946 // CHECK-LABEL: @test_vsraq_n_u8(
4947 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4948 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4949 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(uint8x16_t a,uint8x16_t b)4950 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
4951 return vsraq_n_u8(a, b, 3);
4952 }
4953
4954 // CHECK-LABEL: @test_vsraq_n_u16(
4955 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4956 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4957 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4958 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4959 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4960 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4961 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(uint16x8_t a,uint16x8_t b)4962 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
4963 return vsraq_n_u16(a, b, 3);
4964 }
4965
4966 // CHECK-LABEL: @test_vsraq_n_u32(
4967 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4968 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4969 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4970 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4971 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4972 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4973 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(uint32x4_t a,uint32x4_t b)4974 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
4975 return vsraq_n_u32(a, b, 3);
4976 }
4977
4978 // CHECK-LABEL: @test_vsraq_n_u64(
4979 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4980 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4981 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4982 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4983 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
4984 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4985 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(uint64x2_t a,uint64x2_t b)4986 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
4987 return vsraq_n_u64(a, b, 3);
4988 }
4989
4990 // CHECK-LABEL: @test_vrshr_n_s8(
4991 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
4992 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)4993 int8x8_t test_vrshr_n_s8(int8x8_t a) {
4994 return vrshr_n_s8(a, 3);
4995 }
4996
4997 // CHECK-LABEL: @test_vrshr_n_s16(
4998 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4999 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5000 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5001 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)5002 int16x4_t test_vrshr_n_s16(int16x4_t a) {
5003 return vrshr_n_s16(a, 3);
5004 }
5005
5006 // CHECK-LABEL: @test_vrshr_n_s32(
5007 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5008 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5009 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5010 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)5011 int32x2_t test_vrshr_n_s32(int32x2_t a) {
5012 return vrshr_n_s32(a, 3);
5013 }
5014
5015 // CHECK-LABEL: @test_vrshrq_n_s8(
5016 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5017 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)5018 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
5019 return vrshrq_n_s8(a, 3);
5020 }
5021
5022 // CHECK-LABEL: @test_vrshrq_n_s16(
5023 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5024 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5025 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5026 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)5027 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
5028 return vrshrq_n_s16(a, 3);
5029 }
5030
5031 // CHECK-LABEL: @test_vrshrq_n_s32(
5032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5033 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5034 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5035 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)5036 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
5037 return vrshrq_n_s32(a, 3);
5038 }
5039
5040 // CHECK-LABEL: @test_vrshrq_n_s64(
5041 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5042 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5043 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5044 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)5045 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
5046 return vrshrq_n_s64(a, 3);
5047 }
5048
5049 // CHECK-LABEL: @test_vrshr_n_u8(
5050 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5051 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(uint8x8_t a)5052 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
5053 return vrshr_n_u8(a, 3);
5054 }
5055
5056 // CHECK-LABEL: @test_vrshr_n_u16(
5057 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5058 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5059 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5060 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(uint16x4_t a)5061 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
5062 return vrshr_n_u16(a, 3);
5063 }
5064
5065 // CHECK-LABEL: @test_vrshr_n_u32(
5066 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5067 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5068 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5069 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(uint32x2_t a)5070 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
5071 return vrshr_n_u32(a, 3);
5072 }
5073
5074 // CHECK-LABEL: @test_vrshrq_n_u8(
5075 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5076 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(uint8x16_t a)5077 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
5078 return vrshrq_n_u8(a, 3);
5079 }
5080
5081 // CHECK-LABEL: @test_vrshrq_n_u16(
5082 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5083 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5084 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5085 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(uint16x8_t a)5086 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
5087 return vrshrq_n_u16(a, 3);
5088 }
5089
5090 // CHECK-LABEL: @test_vrshrq_n_u32(
5091 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5092 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5093 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5094 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(uint32x4_t a)5095 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
5096 return vrshrq_n_u32(a, 3);
5097 }
5098
5099 // CHECK-LABEL: @test_vrshrq_n_u64(
5100 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5101 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5102 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5103 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(uint64x2_t a)5104 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
5105 return vrshrq_n_u64(a, 3);
5106 }
5107
5108 // CHECK-LABEL: @test_vrsra_n_s8(
5109 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5110 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5111 // CHECK: ret <8 x i8> [[TMP0]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)5112 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
5113 return vrsra_n_s8(a, b, 3);
5114 }
5115
5116 // CHECK-LABEL: @test_vrsra_n_s16(
5117 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5118 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5119 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5120 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5121 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5122 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5123 // CHECK: ret <4 x i16> [[TMP3]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)5124 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
5125 return vrsra_n_s16(a, b, 3);
5126 }
5127
5128 // CHECK-LABEL: @test_vrsra_n_s32(
5129 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5130 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5131 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5132 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5133 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5134 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5135 // CHECK: ret <2 x i32> [[TMP3]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)5136 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
5137 return vrsra_n_s32(a, b, 3);
5138 }
5139
5140 // CHECK-LABEL: @test_vrsraq_n_s8(
5141 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5142 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5143 // CHECK: ret <16 x i8> [[TMP0]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)5144 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
5145 return vrsraq_n_s8(a, b, 3);
5146 }
5147
5148 // CHECK-LABEL: @test_vrsraq_n_s16(
5149 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5150 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5151 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5152 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5153 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5154 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5155 // CHECK: ret <8 x i16> [[TMP3]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)5156 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
5157 return vrsraq_n_s16(a, b, 3);
5158 }
5159
5160 // CHECK-LABEL: @test_vrsraq_n_s32(
5161 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5162 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5163 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5165 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5166 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5167 // CHECK: ret <4 x i32> [[TMP3]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)5168 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
5169 return vrsraq_n_s32(a, b, 3);
5170 }
5171
5172 // CHECK-LABEL: @test_vrsraq_n_s64(
5173 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5174 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5175 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5176 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5177 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5178 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5179 // CHECK: ret <2 x i64> [[TMP3]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)5180 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
5181 return vrsraq_n_s64(a, b, 3);
5182 }
5183
5184 // CHECK-LABEL: @test_vrsra_n_u8(
5185 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5186 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5187 // CHECK: ret <8 x i8> [[TMP0]]
test_vrsra_n_u8(uint8x8_t a,uint8x8_t b)5188 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
5189 return vrsra_n_u8(a, b, 3);
5190 }
5191
5192 // CHECK-LABEL: @test_vrsra_n_u16(
5193 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5194 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5195 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5196 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5197 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5198 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5199 // CHECK: ret <4 x i16> [[TMP3]]
test_vrsra_n_u16(uint16x4_t a,uint16x4_t b)5200 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
5201 return vrsra_n_u16(a, b, 3);
5202 }
5203
5204 // CHECK-LABEL: @test_vrsra_n_u32(
5205 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5206 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5207 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5208 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5209 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5210 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5211 // CHECK: ret <2 x i32> [[TMP3]]
test_vrsra_n_u32(uint32x2_t a,uint32x2_t b)5212 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
5213 return vrsra_n_u32(a, b, 3);
5214 }
5215
5216 // CHECK-LABEL: @test_vrsraq_n_u8(
5217 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5218 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5219 // CHECK: ret <16 x i8> [[TMP0]]
test_vrsraq_n_u8(uint8x16_t a,uint8x16_t b)5220 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
5221 return vrsraq_n_u8(a, b, 3);
5222 }
5223
5224 // CHECK-LABEL: @test_vrsraq_n_u16(
5225 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5226 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5227 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5228 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5229 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5230 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5231 // CHECK: ret <8 x i16> [[TMP3]]
test_vrsraq_n_u16(uint16x8_t a,uint16x8_t b)5232 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
5233 return vrsraq_n_u16(a, b, 3);
5234 }
5235
5236 // CHECK-LABEL: @test_vrsraq_n_u32(
5237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5238 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5239 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5240 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5241 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5242 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5243 // CHECK: ret <4 x i32> [[TMP3]]
test_vrsraq_n_u32(uint32x4_t a,uint32x4_t b)5244 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
5245 return vrsraq_n_u32(a, b, 3);
5246 }
5247
5248 // CHECK-LABEL: @test_vrsraq_n_u64(
5249 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5250 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5251 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5252 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5253 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5254 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5255 // CHECK: ret <2 x i64> [[TMP3]]
test_vrsraq_n_u64(uint64x2_t a,uint64x2_t b)5256 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
5257 return vrsraq_n_u64(a, b, 3);
5258 }
5259
5260 // CHECK-LABEL: @test_vsri_n_s8(
5261 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5262 // CHECK: ret <8 x i8> [[VSRI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)5263 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
5264 return vsri_n_s8(a, b, 3);
5265 }
5266
5267 // CHECK-LABEL: @test_vsri_n_s16(
5268 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5269 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5270 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5271 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5272 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5273 // CHECK: ret <4 x i16> [[VSRI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)5274 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
5275 return vsri_n_s16(a, b, 3);
5276 }
5277
5278 // CHECK-LABEL: @test_vsri_n_s32(
5279 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5280 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5281 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5282 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5283 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5284 // CHECK: ret <2 x i32> [[VSRI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)5285 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
5286 return vsri_n_s32(a, b, 3);
5287 }
5288
5289 // CHECK-LABEL: @test_vsriq_n_s8(
5290 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5291 // CHECK: ret <16 x i8> [[VSRI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)5292 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
5293 return vsriq_n_s8(a, b, 3);
5294 }
5295
5296 // CHECK-LABEL: @test_vsriq_n_s16(
5297 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5298 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5299 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5300 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5301 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5302 // CHECK: ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)5303 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
5304 return vsriq_n_s16(a, b, 3);
5305 }
5306
5307 // CHECK-LABEL: @test_vsriq_n_s32(
5308 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5309 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5310 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5311 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5312 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5313 // CHECK: ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)5314 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
5315 return vsriq_n_s32(a, b, 3);
5316 }
5317
5318 // CHECK-LABEL: @test_vsriq_n_s64(
5319 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5320 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5321 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5322 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5323 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5324 // CHECK: ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)5325 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
5326 return vsriq_n_s64(a, b, 3);
5327 }
5328
5329 // CHECK-LABEL: @test_vsri_n_u8(
5330 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5331 // CHECK: ret <8 x i8> [[VSRI_N]]
test_vsri_n_u8(uint8x8_t a,uint8x8_t b)5332 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
5333 return vsri_n_u8(a, b, 3);
5334 }
5335
5336 // CHECK-LABEL: @test_vsri_n_u16(
5337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5339 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5340 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5341 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5342 // CHECK: ret <4 x i16> [[VSRI_N2]]
test_vsri_n_u16(uint16x4_t a,uint16x4_t b)5343 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
5344 return vsri_n_u16(a, b, 3);
5345 }
5346
5347 // CHECK-LABEL: @test_vsri_n_u32(
5348 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5349 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5350 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5351 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5352 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5353 // CHECK: ret <2 x i32> [[VSRI_N2]]
test_vsri_n_u32(uint32x2_t a,uint32x2_t b)5354 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
5355 return vsri_n_u32(a, b, 3);
5356 }
5357
5358 // CHECK-LABEL: @test_vsriq_n_u8(
5359 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5360 // CHECK: ret <16 x i8> [[VSRI_N]]
test_vsriq_n_u8(uint8x16_t a,uint8x16_t b)5361 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
5362 return vsriq_n_u8(a, b, 3);
5363 }
5364
5365 // CHECK-LABEL: @test_vsriq_n_u16(
5366 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5367 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5368 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5369 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5370 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5371 // CHECK: ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_u16(uint16x8_t a,uint16x8_t b)5372 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
5373 return vsriq_n_u16(a, b, 3);
5374 }
5375
5376 // CHECK-LABEL: @test_vsriq_n_u32(
5377 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5378 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5379 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5380 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5381 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5382 // CHECK: ret <4 x i32> [[VSRI_N2]]
test_vsriq_n_u32(uint32x4_t a,uint32x4_t b)5383 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
5384 return vsriq_n_u32(a, b, 3);
5385 }
5386
5387 // CHECK-LABEL: @test_vsriq_n_u64(
5388 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5389 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5390 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5391 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5392 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5393 // CHECK: ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_u64(uint64x2_t a,uint64x2_t b)5394 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
5395 return vsriq_n_u64(a, b, 3);
5396 }
5397
5398 // CHECK-LABEL: @test_vsri_n_p8(
5399 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5400 // CHECK: ret <8 x i8> [[VSRI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)5401 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
5402 return vsri_n_p8(a, b, 3);
5403 }
5404
5405 // CHECK-LABEL: @test_vsri_n_p16(
5406 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5407 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5408 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5409 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5410 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
5411 // CHECK: ret <4 x i16> [[VSRI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)5412 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
5413 return vsri_n_p16(a, b, 15);
5414 }
5415
5416 // CHECK-LABEL: @test_vsriq_n_p8(
5417 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5418 // CHECK: ret <16 x i8> [[VSRI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)5419 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
5420 return vsriq_n_p8(a, b, 3);
5421 }
5422
5423 // CHECK-LABEL: @test_vsriq_n_p16(
5424 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5425 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5426 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5427 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5428 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
5429 // CHECK: ret <8 x i16> [[VSRI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)5430 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
5431 return vsriq_n_p16(a, b, 15);
5432 }
5433
5434 // CHECK-LABEL: @test_vsli_n_s8(
5435 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5436 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)5437 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
5438 return vsli_n_s8(a, b, 3);
5439 }
5440
5441 // CHECK-LABEL: @test_vsli_n_s16(
5442 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5444 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5445 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5446 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5447 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)5448 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
5449 return vsli_n_s16(a, b, 3);
5450 }
5451
5452 // CHECK-LABEL: @test_vsli_n_s32(
5453 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5454 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5455 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5456 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5457 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5458 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)5459 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
5460 return vsli_n_s32(a, b, 3);
5461 }
5462
5463 // CHECK-LABEL: @test_vsliq_n_s8(
5464 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5465 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)5466 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
5467 return vsliq_n_s8(a, b, 3);
5468 }
5469
5470 // CHECK-LABEL: @test_vsliq_n_s16(
5471 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5472 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5473 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5474 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5475 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5476 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)5477 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
5478 return vsliq_n_s16(a, b, 3);
5479 }
5480
5481 // CHECK-LABEL: @test_vsliq_n_s32(
5482 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5483 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5484 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5485 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5486 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5487 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)5488 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
5489 return vsliq_n_s32(a, b, 3);
5490 }
5491
5492 // CHECK-LABEL: @test_vsliq_n_s64(
5493 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5494 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5495 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5496 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5497 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5498 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)5499 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
5500 return vsliq_n_s64(a, b, 3);
5501 }
5502
5503 // CHECK-LABEL: @test_vsli_n_u8(
5504 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5505 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)5506 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
5507 return vsli_n_u8(a, b, 3);
5508 }
5509
5510 // CHECK-LABEL: @test_vsli_n_u16(
5511 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5512 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5513 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5514 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5515 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5516 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)5517 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
5518 return vsli_n_u16(a, b, 3);
5519 }
5520
5521 // CHECK-LABEL: @test_vsli_n_u32(
5522 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5523 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5524 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5525 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5526 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5527 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)5528 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
5529 return vsli_n_u32(a, b, 3);
5530 }
5531
5532 // CHECK-LABEL: @test_vsliq_n_u8(
5533 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5534 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)5535 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
5536 return vsliq_n_u8(a, b, 3);
5537 }
5538
5539 // CHECK-LABEL: @test_vsliq_n_u16(
5540 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5541 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5542 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5543 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5544 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5545 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)5546 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
5547 return vsliq_n_u16(a, b, 3);
5548 }
5549
5550 // CHECK-LABEL: @test_vsliq_n_u32(
5551 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5552 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5553 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5554 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5555 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5556 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)5557 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
5558 return vsliq_n_u32(a, b, 3);
5559 }
5560
5561 // CHECK-LABEL: @test_vsliq_n_u64(
5562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5563 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5564 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5565 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5566 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5567 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)5568 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
5569 return vsliq_n_u64(a, b, 3);
5570 }
5571
5572 // CHECK-LABEL: @test_vsli_n_p8(
5573 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5574 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)5575 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
5576 return vsli_n_p8(a, b, 3);
5577 }
5578
5579 // CHECK-LABEL: @test_vsli_n_p16(
5580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5582 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5583 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5584 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
5585 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)5586 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
5587 return vsli_n_p16(a, b, 15);
5588 }
5589
5590 // CHECK-LABEL: @test_vsliq_n_p8(
5591 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5592 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)5593 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
5594 return vsliq_n_p8(a, b, 3);
5595 }
5596
5597 // CHECK-LABEL: @test_vsliq_n_p16(
5598 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5599 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5600 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5601 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5602 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
5603 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)5604 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
5605 return vsliq_n_p16(a, b, 15);
5606 }
5607
5608 // CHECK-LABEL: @test_vqshlu_n_s8(
5609 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5610 // CHECK: ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)5611 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
5612 return vqshlu_n_s8(a, 3);
5613 }
5614
5615 // CHECK-LABEL: @test_vqshlu_n_s16(
5616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5617 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5618 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
5619 // CHECK: ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)5620 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
5621 return vqshlu_n_s16(a, 3);
5622 }
5623
5624 // CHECK-LABEL: @test_vqshlu_n_s32(
5625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5626 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5627 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
5628 // CHECK: ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)5629 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
5630 return vqshlu_n_s32(a, 3);
5631 }
5632
5633 // CHECK-LABEL: @test_vqshluq_n_s8(
5634 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5635 // CHECK: ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)5636 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
5637 return vqshluq_n_s8(a, 3);
5638 }
5639
5640 // CHECK-LABEL: @test_vqshluq_n_s16(
5641 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5642 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5643 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
5644 // CHECK: ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)5645 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
5646 return vqshluq_n_s16(a, 3);
5647 }
5648
5649 // CHECK-LABEL: @test_vqshluq_n_s32(
5650 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5651 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5652 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
5653 // CHECK: ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)5654 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
5655 return vqshluq_n_s32(a, 3);
5656 }
5657
5658 // CHECK-LABEL: @test_vqshluq_n_s64(
5659 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5660 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5661 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
5662 // CHECK: ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)5663 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
5664 return vqshluq_n_s64(a, 3);
5665 }
5666
5667 // CHECK-LABEL: @test_vshrn_n_s16(
5668 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5669 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5670 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5671 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5672 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)5673 int8x8_t test_vshrn_n_s16(int16x8_t a) {
5674 return vshrn_n_s16(a, 3);
5675 }
5676
5677 // CHECK-LABEL: @test_vshrn_n_s32(
5678 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5679 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5680 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5681 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5682 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)5683 int16x4_t test_vshrn_n_s32(int32x4_t a) {
5684 return vshrn_n_s32(a, 9);
5685 }
5686
5687 // CHECK-LABEL: @test_vshrn_n_s64(
5688 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5689 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5690 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5691 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5692 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)5693 int32x2_t test_vshrn_n_s64(int64x2_t a) {
5694 return vshrn_n_s64(a, 19);
5695 }
5696
5697 // CHECK-LABEL: @test_vshrn_n_u16(
5698 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5699 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5700 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5701 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5702 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)5703 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
5704 return vshrn_n_u16(a, 3);
5705 }
5706
5707 // CHECK-LABEL: @test_vshrn_n_u32(
5708 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5709 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5710 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5711 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5712 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)5713 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
5714 return vshrn_n_u32(a, 9);
5715 }
5716
5717 // CHECK-LABEL: @test_vshrn_n_u64(
5718 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5719 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5720 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5721 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5722 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)5723 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
5724 return vshrn_n_u64(a, 19);
5725 }
5726
5727 // CHECK-LABEL: @test_vshrn_high_n_s16(
5728 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5729 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5730 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5731 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5732 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5733 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_s16(int8x8_t a,int16x8_t b)5734 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5735 return vshrn_high_n_s16(a, b, 3);
5736 }
5737
5738 // CHECK-LABEL: @test_vshrn_high_n_s32(
5739 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5740 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5741 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5742 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5743 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5744 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_s32(int16x4_t a,int32x4_t b)5745 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5746 return vshrn_high_n_s32(a, b, 9);
5747 }
5748
5749 // CHECK-LABEL: @test_vshrn_high_n_s64(
5750 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5751 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5752 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5753 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5754 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5755 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_s64(int32x2_t a,int64x2_t b)5756 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5757 return vshrn_high_n_s64(a, b, 19);
5758 }
5759
5760 // CHECK-LABEL: @test_vshrn_high_n_u16(
5761 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5762 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5763 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5764 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5765 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5766 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5767 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5768 return vshrn_high_n_u16(a, b, 3);
5769 }
5770
5771 // CHECK-LABEL: @test_vshrn_high_n_u32(
5772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5773 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5774 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5775 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5776 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5777 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5778 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5779 return vshrn_high_n_u32(a, b, 9);
5780 }
5781
5782 // CHECK-LABEL: @test_vshrn_high_n_u64(
5783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5784 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5785 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5786 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5787 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5788 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5789 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5790 return vshrn_high_n_u64(a, b, 19);
5791 }
5792
5793 // CHECK-LABEL: @test_vqshrun_n_s16(
5794 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5795 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5796 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5797 // CHECK: ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)5798 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
5799 return vqshrun_n_s16(a, 3);
5800 }
5801
5802 // CHECK-LABEL: @test_vqshrun_n_s32(
5803 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5804 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5805 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5806 // CHECK: ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)5807 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
5808 return vqshrun_n_s32(a, 9);
5809 }
5810
5811 // CHECK-LABEL: @test_vqshrun_n_s64(
5812 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5813 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5814 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5815 // CHECK: ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)5816 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
5817 return vqshrun_n_s64(a, 19);
5818 }
5819
5820 // CHECK-LABEL: @test_vqshrun_high_n_s16(
5821 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5822 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5823 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5824 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5825 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqshrun_high_n_s16(int8x8_t a,int16x8_t b)5826 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5827 return vqshrun_high_n_s16(a, b, 3);
5828 }
5829
5830 // CHECK-LABEL: @test_vqshrun_high_n_s32(
5831 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5832 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5833 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5834 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5835 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqshrun_high_n_s32(int16x4_t a,int32x4_t b)5836 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
5837 return vqshrun_high_n_s32(a, b, 9);
5838 }
5839
5840 // CHECK-LABEL: @test_vqshrun_high_n_s64(
5841 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5842 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5843 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5844 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5845 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqshrun_high_n_s64(int32x2_t a,int64x2_t b)5846 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
5847 return vqshrun_high_n_s64(a, b, 19);
5848 }
5849
5850 // CHECK-LABEL: @test_vrshrn_n_s16(
5851 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5852 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5853 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5854 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)5855 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
5856 return vrshrn_n_s16(a, 3);
5857 }
5858
5859 // CHECK-LABEL: @test_vrshrn_n_s32(
5860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5861 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5862 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5863 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)5864 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
5865 return vrshrn_n_s32(a, 9);
5866 }
5867
5868 // CHECK-LABEL: @test_vrshrn_n_s64(
5869 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5870 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5871 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5872 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)5873 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
5874 return vrshrn_n_s64(a, 19);
5875 }
5876
5877 // CHECK-LABEL: @test_vrshrn_n_u16(
5878 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5879 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5880 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5881 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)5882 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
5883 return vrshrn_n_u16(a, 3);
5884 }
5885
5886 // CHECK-LABEL: @test_vrshrn_n_u32(
5887 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5888 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5889 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5890 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)5891 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
5892 return vrshrn_n_u32(a, 9);
5893 }
5894
5895 // CHECK-LABEL: @test_vrshrn_n_u64(
5896 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5897 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5898 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5899 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)5900 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
5901 return vrshrn_n_u64(a, 19);
5902 }
5903
5904 // CHECK-LABEL: @test_vrshrn_high_n_s16(
5905 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5906 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5907 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5908 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5909 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_s16(int8x8_t a,int16x8_t b)5910 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5911 return vrshrn_high_n_s16(a, b, 3);
5912 }
5913
5914 // CHECK-LABEL: @test_vrshrn_high_n_s32(
5915 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5916 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5917 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5918 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5919 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_s32(int16x4_t a,int32x4_t b)5920 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5921 return vrshrn_high_n_s32(a, b, 9);
5922 }
5923
5924 // CHECK-LABEL: @test_vrshrn_high_n_s64(
5925 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5926 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5927 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5928 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5929 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_s64(int32x2_t a,int64x2_t b)5930 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5931 return vrshrn_high_n_s64(a, b, 19);
5932 }
5933
5934 // CHECK-LABEL: @test_vrshrn_high_n_u16(
5935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5936 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5937 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5938 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5939 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)5940 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5941 return vrshrn_high_n_u16(a, b, 3);
5942 }
5943
5944 // CHECK-LABEL: @test_vrshrn_high_n_u32(
5945 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5946 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5947 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5948 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5949 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)5950 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5951 return vrshrn_high_n_u32(a, b, 9);
5952 }
5953
5954 // CHECK-LABEL: @test_vrshrn_high_n_u64(
5955 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5956 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5957 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5958 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5959 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)5960 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5961 return vrshrn_high_n_u64(a, b, 19);
5962 }
5963
5964 // CHECK-LABEL: @test_vqrshrun_n_s16(
5965 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5966 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5967 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5968 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)5969 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
5970 return vqrshrun_n_s16(a, 3);
5971 }
5972
5973 // CHECK-LABEL: @test_vqrshrun_n_s32(
5974 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5975 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5976 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
5977 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)5978 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
5979 return vqrshrun_n_s32(a, 9);
5980 }
5981
5982 // CHECK-LABEL: @test_vqrshrun_n_s64(
5983 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5984 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5985 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
5986 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)5987 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
5988 return vqrshrun_n_s64(a, 19);
5989 }
5990
5991 // CHECK-LABEL: @test_vqrshrun_high_n_s16(
5992 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5993 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5994 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5995 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5996 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrun_high_n_s16(int8x8_t a,int16x8_t b)5997 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5998 return vqrshrun_high_n_s16(a, b, 3);
5999 }
6000
6001 // CHECK-LABEL: @test_vqrshrun_high_n_s32(
6002 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6003 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6004 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6005 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6006 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrun_high_n_s32(int16x4_t a,int32x4_t b)6007 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6008 return vqrshrun_high_n_s32(a, b, 9);
6009 }
6010
6011 // CHECK-LABEL: @test_vqrshrun_high_n_s64(
6012 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6013 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6014 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6015 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6016 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrun_high_n_s64(int32x2_t a,int64x2_t b)6017 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6018 return vqrshrun_high_n_s64(a, b, 19);
6019 }
6020
6021 // CHECK-LABEL: @test_vqshrn_n_s16(
6022 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6023 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6025 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)6026 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
6027 return vqshrn_n_s16(a, 3);
6028 }
6029
6030 // CHECK-LABEL: @test_vqshrn_n_s32(
6031 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6032 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6033 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6034 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)6035 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
6036 return vqshrn_n_s32(a, 9);
6037 }
6038
6039 // CHECK-LABEL: @test_vqshrn_n_s64(
6040 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6041 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6042 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6043 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)6044 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
6045 return vqshrn_n_s64(a, 19);
6046 }
6047
6048 // CHECK-LABEL: @test_vqshrn_n_u16(
6049 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6050 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6051 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6052 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)6053 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
6054 return vqshrn_n_u16(a, 3);
6055 }
6056
6057 // CHECK-LABEL: @test_vqshrn_n_u32(
6058 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6059 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6060 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6061 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)6062 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
6063 return vqshrn_n_u32(a, 9);
6064 }
6065
6066 // CHECK-LABEL: @test_vqshrn_n_u64(
6067 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6068 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6069 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6070 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)6071 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
6072 return vqshrn_n_u64(a, 19);
6073 }
6074
6075 // CHECK-LABEL: @test_vqshrn_high_n_s16(
6076 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6077 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6078 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6079 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6080 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_s16(int8x8_t a,int16x8_t b)6081 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6082 return vqshrn_high_n_s16(a, b, 3);
6083 }
6084
6085 // CHECK-LABEL: @test_vqshrn_high_n_s32(
6086 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6087 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6088 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6089 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6090 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_s32(int16x4_t a,int32x4_t b)6091 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6092 return vqshrn_high_n_s32(a, b, 9);
6093 }
6094
6095 // CHECK-LABEL: @test_vqshrn_high_n_s64(
6096 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6097 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6098 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6099 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6100 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_s64(int32x2_t a,int64x2_t b)6101 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6102 return vqshrn_high_n_s64(a, b, 19);
6103 }
6104
6105 // CHECK-LABEL: @test_vqshrn_high_n_u16(
6106 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6107 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6108 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6109 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6110 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6111 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6112 return vqshrn_high_n_u16(a, b, 3);
6113 }
6114
6115 // CHECK-LABEL: @test_vqshrn_high_n_u32(
6116 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6117 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6118 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6119 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6120 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6121 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6122 return vqshrn_high_n_u32(a, b, 9);
6123 }
6124
6125 // CHECK-LABEL: @test_vqshrn_high_n_u64(
6126 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6127 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6128 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6129 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6130 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6131 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6132 return vqshrn_high_n_u64(a, b, 19);
6133 }
6134
6135 // CHECK-LABEL: @test_vqrshrn_n_s16(
6136 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6137 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6138 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6139 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)6140 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
6141 return vqrshrn_n_s16(a, 3);
6142 }
6143
6144 // CHECK-LABEL: @test_vqrshrn_n_s32(
6145 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6146 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6147 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6148 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)6149 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
6150 return vqrshrn_n_s32(a, 9);
6151 }
6152
6153 // CHECK-LABEL: @test_vqrshrn_n_s64(
6154 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6155 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6156 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6157 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)6158 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
6159 return vqrshrn_n_s64(a, 19);
6160 }
6161
6162 // CHECK-LABEL: @test_vqrshrn_n_u16(
6163 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6164 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6165 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6166 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)6167 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
6168 return vqrshrn_n_u16(a, 3);
6169 }
6170
6171 // CHECK-LABEL: @test_vqrshrn_n_u32(
6172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6173 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6174 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6175 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)6176 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
6177 return vqrshrn_n_u32(a, 9);
6178 }
6179
6180 // CHECK-LABEL: @test_vqrshrn_n_u64(
6181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6182 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6183 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6184 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)6185 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
6186 return vqrshrn_n_u64(a, 19);
6187 }
6188
6189 // CHECK-LABEL: @test_vqrshrn_high_n_s16(
6190 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6191 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6192 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6193 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6194 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_s16(int8x8_t a,int16x8_t b)6195 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6196 return vqrshrn_high_n_s16(a, b, 3);
6197 }
6198
6199 // CHECK-LABEL: @test_vqrshrn_high_n_s32(
6200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6201 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6202 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6203 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6204 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_s32(int16x4_t a,int32x4_t b)6205 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6206 return vqrshrn_high_n_s32(a, b, 9);
6207 }
6208
6209 // CHECK-LABEL: @test_vqrshrn_high_n_s64(
6210 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6211 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6212 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6213 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6214 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_s64(int32x2_t a,int64x2_t b)6215 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6216 return vqrshrn_high_n_s64(a, b, 19);
6217 }
6218
6219 // CHECK-LABEL: @test_vqrshrn_high_n_u16(
6220 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6221 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6222 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6223 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6224 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vqrshrn_high_n_u16(uint8x8_t a,uint16x8_t b)6225 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6226 return vqrshrn_high_n_u16(a, b, 3);
6227 }
6228
6229 // CHECK-LABEL: @test_vqrshrn_high_n_u32(
6230 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6231 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6232 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6233 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6234 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vqrshrn_high_n_u32(uint16x4_t a,uint32x4_t b)6235 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6236 return vqrshrn_high_n_u32(a, b, 9);
6237 }
6238
6239 // CHECK-LABEL: @test_vqrshrn_high_n_u64(
6240 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6241 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6242 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6243 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6244 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vqrshrn_high_n_u64(uint32x2_t a,uint64x2_t b)6245 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6246 return vqrshrn_high_n_u64(a, b, 19);
6247 }
6248
6249 // CHECK-LABEL: @test_vshll_n_s8(
6250 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6251 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6252 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)6253 int16x8_t test_vshll_n_s8(int8x8_t a) {
6254 return vshll_n_s8(a, 3);
6255 }
6256
6257 // CHECK-LABEL: @test_vshll_n_s16(
6258 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6259 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6260 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6261 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6262 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)6263 int32x4_t test_vshll_n_s16(int16x4_t a) {
6264 return vshll_n_s16(a, 9);
6265 }
6266
6267 // CHECK-LABEL: @test_vshll_n_s32(
6268 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6270 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6271 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6272 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)6273 int64x2_t test_vshll_n_s32(int32x2_t a) {
6274 return vshll_n_s32(a, 19);
6275 }
6276
6277 // CHECK-LABEL: @test_vshll_n_u8(
6278 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6279 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6280 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)6281 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
6282 return vshll_n_u8(a, 3);
6283 }
6284
6285 // CHECK-LABEL: @test_vshll_n_u16(
6286 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6287 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6288 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6289 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6290 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)6291 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
6292 return vshll_n_u16(a, 9);
6293 }
6294
6295 // CHECK-LABEL: @test_vshll_n_u32(
6296 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6297 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6298 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6299 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6300 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)6301 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
6302 return vshll_n_u32(a, 19);
6303 }
6304
6305 // CHECK-LABEL: @test_vshll_high_n_s8(
6306 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6307 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6308 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6309 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_s8(int8x16_t a)6310 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
6311 return vshll_high_n_s8(a, 3);
6312 }
6313
6314 // CHECK-LABEL: @test_vshll_high_n_s16(
6315 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6316 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6317 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6318 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6319 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6320 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_s16(int16x8_t a)6321 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
6322 return vshll_high_n_s16(a, 9);
6323 }
6324
6325 // CHECK-LABEL: @test_vshll_high_n_s32(
6326 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6327 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6328 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6329 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6330 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6331 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_s32(int32x4_t a)6332 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
6333 return vshll_high_n_s32(a, 19);
6334 }
6335
6336 // CHECK-LABEL: @test_vshll_high_n_u8(
6337 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6338 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6339 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6340 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_high_n_u8(uint8x16_t a)6341 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
6342 return vshll_high_n_u8(a, 3);
6343 }
6344
6345 // CHECK-LABEL: @test_vshll_high_n_u16(
6346 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6347 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6348 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6349 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6350 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6351 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_high_n_u16(uint16x8_t a)6352 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
6353 return vshll_high_n_u16(a, 9);
6354 }
6355
6356 // CHECK-LABEL: @test_vshll_high_n_u32(
6357 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6358 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6359 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6360 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6361 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6362 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_high_n_u32(uint32x4_t a)6363 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
6364 return vshll_high_n_u32(a, 19);
6365 }
6366
6367 // CHECK-LABEL: @test_vmovl_s8(
6368 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6369 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)6370 int16x8_t test_vmovl_s8(int8x8_t a) {
6371 return vmovl_s8(a);
6372 }
6373
6374 // CHECK-LABEL: @test_vmovl_s16(
6375 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6376 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6377 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)6378 int32x4_t test_vmovl_s16(int16x4_t a) {
6379 return vmovl_s16(a);
6380 }
6381
6382 // CHECK-LABEL: @test_vmovl_s32(
6383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6384 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6385 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)6386 int64x2_t test_vmovl_s32(int32x2_t a) {
6387 return vmovl_s32(a);
6388 }
6389
6390 // CHECK-LABEL: @test_vmovl_u8(
6391 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6392 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)6393 uint16x8_t test_vmovl_u8(uint8x8_t a) {
6394 return vmovl_u8(a);
6395 }
6396
6397 // CHECK-LABEL: @test_vmovl_u16(
6398 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6399 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6400 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)6401 uint32x4_t test_vmovl_u16(uint16x4_t a) {
6402 return vmovl_u16(a);
6403 }
6404
6405 // CHECK-LABEL: @test_vmovl_u32(
6406 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6407 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6408 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)6409 uint64x2_t test_vmovl_u32(uint32x2_t a) {
6410 return vmovl_u32(a);
6411 }
6412
6413 // CHECK-LABEL: @test_vmovl_high_s8(
6414 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6415 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6416 // CHECK: ret <8 x i16> [[TMP0]]
test_vmovl_high_s8(int8x16_t a)6417 int16x8_t test_vmovl_high_s8(int8x16_t a) {
6418 return vmovl_high_s8(a);
6419 }
6420
6421 // CHECK-LABEL: @test_vmovl_high_s16(
6422 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6423 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6424 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6425 // CHECK: ret <4 x i32> [[TMP1]]
test_vmovl_high_s16(int16x8_t a)6426 int32x4_t test_vmovl_high_s16(int16x8_t a) {
6427 return vmovl_high_s16(a);
6428 }
6429
6430 // CHECK-LABEL: @test_vmovl_high_s32(
6431 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6432 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6433 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6434 // CHECK: ret <2 x i64> [[TMP1]]
test_vmovl_high_s32(int32x4_t a)6435 int64x2_t test_vmovl_high_s32(int32x4_t a) {
6436 return vmovl_high_s32(a);
6437 }
6438
6439 // CHECK-LABEL: @test_vmovl_high_u8(
6440 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6441 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6442 // CHECK: ret <8 x i16> [[TMP0]]
test_vmovl_high_u8(uint8x16_t a)6443 uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
6444 return vmovl_high_u8(a);
6445 }
6446
6447 // CHECK-LABEL: @test_vmovl_high_u16(
6448 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6449 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6450 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6451 // CHECK: ret <4 x i32> [[TMP1]]
test_vmovl_high_u16(uint16x8_t a)6452 uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
6453 return vmovl_high_u16(a);
6454 }
6455
6456 // CHECK-LABEL: @test_vmovl_high_u32(
6457 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6458 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6459 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6460 // CHECK: ret <2 x i64> [[TMP1]]
test_vmovl_high_u32(uint32x4_t a)6461 uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
6462 return vmovl_high_u32(a);
6463 }
6464
6465 // CHECK-LABEL: @test_vcvt_n_f32_s32(
6466 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6467 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6468 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6469 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)6470 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
6471 return vcvt_n_f32_s32(a, 31);
6472 }
6473
6474 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
6475 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6476 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6477 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6478 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)6479 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
6480 return vcvtq_n_f32_s32(a, 31);
6481 }
6482
6483 // CHECK-LABEL: @test_vcvtq_n_f64_s64(
6484 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6485 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6486 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6487 // CHECK: ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_s64(int64x2_t a)6488 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
6489 return vcvtq_n_f64_s64(a, 50);
6490 }
6491
6492 // CHECK-LABEL: @test_vcvt_n_f32_u32(
6493 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6494 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6495 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6496 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)6497 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
6498 return vcvt_n_f32_u32(a, 31);
6499 }
6500
6501 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
6502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6503 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6504 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6505 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)6506 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
6507 return vcvtq_n_f32_u32(a, 31);
6508 }
6509
6510 // CHECK-LABEL: @test_vcvtq_n_f64_u64(
6511 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6512 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6513 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6514 // CHECK: ret <2 x double> [[VCVT_N1]]
test_vcvtq_n_f64_u64(uint64x2_t a)6515 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
6516 return vcvtq_n_f64_u64(a, 50);
6517 }
6518
6519 // CHECK-LABEL: @test_vcvt_n_s32_f32(
6520 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6521 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6522 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6523 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)6524 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
6525 return vcvt_n_s32_f32(a, 31);
6526 }
6527
6528 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
6529 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6530 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6531 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6532 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)6533 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
6534 return vcvtq_n_s32_f32(a, 31);
6535 }
6536
6537 // CHECK-LABEL: @test_vcvtq_n_s64_f64(
6538 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6539 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6540 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6541 // CHECK: ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_s64_f64(float64x2_t a)6542 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
6543 return vcvtq_n_s64_f64(a, 50);
6544 }
6545
6546 // CHECK-LABEL: @test_vcvt_n_u32_f32(
6547 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6548 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6549 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6550 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)6551 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
6552 return vcvt_n_u32_f32(a, 31);
6553 }
6554
6555 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
6556 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6557 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6558 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6559 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)6560 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
6561 return vcvtq_n_u32_f32(a, 31);
6562 }
6563
6564 // CHECK-LABEL: @test_vcvtq_n_u64_f64(
6565 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6566 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6567 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6568 // CHECK: ret <2 x i64> [[VCVT_N1]]
test_vcvtq_n_u64_f64(float64x2_t a)6569 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
6570 return vcvtq_n_u64_f64(a, 50);
6571 }
6572
6573 // CHECK-LABEL: @test_vaddl_s8(
6574 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6575 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6576 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6577 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)6578 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
6579 return vaddl_s8(a, b);
6580 }
6581
6582 // CHECK-LABEL: @test_vaddl_s16(
6583 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6584 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6585 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6586 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6587 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6588 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)6589 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
6590 return vaddl_s16(a, b);
6591 }
6592
6593 // CHECK-LABEL: @test_vaddl_s32(
6594 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6595 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6596 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6597 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6598 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6599 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)6600 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
6601 return vaddl_s32(a, b);
6602 }
6603
6604 // CHECK-LABEL: @test_vaddl_u8(
6605 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6606 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6607 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6608 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)6609 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
6610 return vaddl_u8(a, b);
6611 }
6612
6613 // CHECK-LABEL: @test_vaddl_u16(
6614 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6615 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6616 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6617 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6618 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6619 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)6620 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
6621 return vaddl_u16(a, b);
6622 }
6623
6624 // CHECK-LABEL: @test_vaddl_u32(
6625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6626 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6627 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6628 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6629 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6630 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)6631 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
6632 return vaddl_u32(a, b);
6633 }
6634
6635 // CHECK-LABEL: @test_vaddl_high_s8(
6636 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6637 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6638 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6639 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6640 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6641 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_high_s8(int8x16_t a,int8x16_t b)6642 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
6643 return vaddl_high_s8(a, b);
6644 }
6645
6646 // CHECK-LABEL: @test_vaddl_high_s16(
6647 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6648 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6649 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6650 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6651 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6652 // CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6653 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6654 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_high_s16(int16x8_t a,int16x8_t b)6655 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
6656 return vaddl_high_s16(a, b);
6657 }
6658
6659 // CHECK-LABEL: @test_vaddl_high_s32(
6660 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6661 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6662 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6663 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6664 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6665 // CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6666 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6667 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_high_s32(int32x4_t a,int32x4_t b)6668 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
6669 return vaddl_high_s32(a, b);
6670 }
6671
6672 // CHECK-LABEL: @test_vaddl_high_u8(
6673 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6674 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6675 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6676 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6677 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6678 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_high_u8(uint8x16_t a,uint8x16_t b)6679 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
6680 return vaddl_high_u8(a, b);
6681 }
6682
6683 // CHECK-LABEL: @test_vaddl_high_u16(
6684 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6685 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6686 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6687 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6688 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6689 // CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6690 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6691 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_high_u16(uint16x8_t a,uint16x8_t b)6692 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
6693 return vaddl_high_u16(a, b);
6694 }
6695
6696 // CHECK-LABEL: @test_vaddl_high_u32(
6697 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6698 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6699 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6700 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6701 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6702 // CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6703 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6704 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_high_u32(uint32x4_t a,uint32x4_t b)6705 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
6706 return vaddl_high_u32(a, b);
6707 }
6708
6709 // CHECK-LABEL: @test_vaddw_s8(
6710 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6711 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6712 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)6713 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
6714 return vaddw_s8(a, b);
6715 }
6716
6717 // CHECK-LABEL: @test_vaddw_s16(
6718 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6719 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6720 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6721 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)6722 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
6723 return vaddw_s16(a, b);
6724 }
6725
6726 // CHECK-LABEL: @test_vaddw_s32(
6727 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6728 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6729 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6730 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)6731 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
6732 return vaddw_s32(a, b);
6733 }
6734
6735 // CHECK-LABEL: @test_vaddw_u8(
6736 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6737 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6738 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)6739 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
6740 return vaddw_u8(a, b);
6741 }
6742
6743 // CHECK-LABEL: @test_vaddw_u16(
6744 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6745 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6746 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6747 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)6748 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
6749 return vaddw_u16(a, b);
6750 }
6751
6752 // CHECK-LABEL: @test_vaddw_u32(
6753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6754 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6755 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6756 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)6757 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
6758 return vaddw_u32(a, b);
6759 }
6760
6761 // CHECK-LABEL: @test_vaddw_high_s8(
6762 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6763 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6764 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6765 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_high_s8(int16x8_t a,int8x16_t b)6766 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
6767 return vaddw_high_s8(a, b);
6768 }
6769
6770 // CHECK-LABEL: @test_vaddw_high_s16(
6771 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6773 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6774 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6775 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_high_s16(int32x4_t a,int16x8_t b)6776 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
6777 return vaddw_high_s16(a, b);
6778 }
6779
6780 // CHECK-LABEL: @test_vaddw_high_s32(
6781 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6782 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6783 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6784 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6785 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_high_s32(int64x2_t a,int32x4_t b)6786 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
6787 return vaddw_high_s32(a, b);
6788 }
6789
6790 // CHECK-LABEL: @test_vaddw_high_u8(
6791 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6792 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6793 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6794 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_high_u8(uint16x8_t a,uint8x16_t b)6795 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
6796 return vaddw_high_u8(a, b);
6797 }
6798
6799 // CHECK-LABEL: @test_vaddw_high_u16(
6800 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6801 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6802 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6803 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6804 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_high_u16(uint32x4_t a,uint16x8_t b)6805 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
6806 return vaddw_high_u16(a, b);
6807 }
6808
6809 // CHECK-LABEL: @test_vaddw_high_u32(
6810 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6812 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6813 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6814 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_high_u32(uint64x2_t a,uint32x4_t b)6815 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
6816 return vaddw_high_u32(a, b);
6817 }
6818
6819 // CHECK-LABEL: @test_vsubl_s8(
6820 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6821 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6822 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6823 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)6824 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
6825 return vsubl_s8(a, b);
6826 }
6827
6828 // CHECK-LABEL: @test_vsubl_s16(
6829 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6830 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6831 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6832 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6833 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6834 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)6835 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
6836 return vsubl_s16(a, b);
6837 }
6838
6839 // CHECK-LABEL: @test_vsubl_s32(
6840 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6841 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6842 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6843 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6844 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6845 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)6846 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
6847 return vsubl_s32(a, b);
6848 }
6849
6850 // CHECK-LABEL: @test_vsubl_u8(
6851 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6852 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6853 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6854 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)6855 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
6856 return vsubl_u8(a, b);
6857 }
6858
6859 // CHECK-LABEL: @test_vsubl_u16(
6860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6861 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6862 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6863 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6864 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6865 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)6866 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
6867 return vsubl_u16(a, b);
6868 }
6869
6870 // CHECK-LABEL: @test_vsubl_u32(
6871 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6872 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6873 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6874 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6875 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6876 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)6877 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
6878 return vsubl_u32(a, b);
6879 }
6880
6881 // CHECK-LABEL: @test_vsubl_high_s8(
6882 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6883 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6884 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6885 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6886 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6887 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_high_s8(int8x16_t a,int8x16_t b)6888 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
6889 return vsubl_high_s8(a, b);
6890 }
6891
6892 // CHECK-LABEL: @test_vsubl_high_s16(
6893 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6895 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6896 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6897 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6898 // CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6899 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6900 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_high_s16(int16x8_t a,int16x8_t b)6901 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
6902 return vsubl_high_s16(a, b);
6903 }
6904
6905 // CHECK-LABEL: @test_vsubl_high_s32(
6906 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6907 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6908 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6909 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6910 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6911 // CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6912 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6913 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_high_s32(int32x4_t a,int32x4_t b)6914 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
6915 return vsubl_high_s32(a, b);
6916 }
6917
6918 // CHECK-LABEL: @test_vsubl_high_u8(
6919 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6920 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6921 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6922 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6923 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6924 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_high_u8(uint8x16_t a,uint8x16_t b)6925 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
6926 return vsubl_high_u8(a, b);
6927 }
6928
6929 // CHECK-LABEL: @test_vsubl_high_u16(
6930 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6931 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6932 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6933 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6934 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6935 // CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6936 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6937 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_high_u16(uint16x8_t a,uint16x8_t b)6938 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
6939 return vsubl_high_u16(a, b);
6940 }
6941
6942 // CHECK-LABEL: @test_vsubl_high_u32(
6943 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6944 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6945 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6946 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6947 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6948 // CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6949 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6950 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_high_u32(uint32x4_t a,uint32x4_t b)6951 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
6952 return vsubl_high_u32(a, b);
6953 }
6954
6955 // CHECK-LABEL: @test_vsubw_s8(
6956 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6957 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6958 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)6959 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
6960 return vsubw_s8(a, b);
6961 }
6962
6963 // CHECK-LABEL: @test_vsubw_s16(
6964 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6965 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6966 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6967 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)6968 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
6969 return vsubw_s16(a, b);
6970 }
6971
6972 // CHECK-LABEL: @test_vsubw_s32(
6973 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6974 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6975 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
6976 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)6977 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
6978 return vsubw_s32(a, b);
6979 }
6980
6981 // CHECK-LABEL: @test_vsubw_u8(
6982 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6983 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6984 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)6985 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
6986 return vsubw_u8(a, b);
6987 }
6988
6989 // CHECK-LABEL: @test_vsubw_u16(
6990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6991 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6992 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6993 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)6994 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
6995 return vsubw_u16(a, b);
6996 }
6997
6998 // CHECK-LABEL: @test_vsubw_u32(
6999 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7000 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
7001 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7002 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)7003 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
7004 return vsubw_u32(a, b);
7005 }
7006
7007 // CHECK-LABEL: @test_vsubw_high_s8(
7008 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7009 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7010 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7011 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_high_s8(int16x8_t a,int8x16_t b)7012 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
7013 return vsubw_high_s8(a, b);
7014 }
7015
7016 // CHECK-LABEL: @test_vsubw_high_s16(
7017 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7018 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7019 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7020 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7021 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_high_s16(int32x4_t a,int16x8_t b)7022 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
7023 return vsubw_high_s16(a, b);
7024 }
7025
7026 // CHECK-LABEL: @test_vsubw_high_s32(
7027 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7028 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7029 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7030 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7031 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_high_s32(int64x2_t a,int32x4_t b)7032 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
7033 return vsubw_high_s32(a, b);
7034 }
7035
7036 // CHECK-LABEL: @test_vsubw_high_u8(
7037 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7038 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7039 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7040 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_high_u8(uint16x8_t a,uint8x16_t b)7041 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
7042 return vsubw_high_u8(a, b);
7043 }
7044
7045 // CHECK-LABEL: @test_vsubw_high_u16(
7046 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7047 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7048 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7049 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7050 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_high_u16(uint32x4_t a,uint16x8_t b)7051 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
7052 return vsubw_high_u16(a, b);
7053 }
7054
7055 // CHECK-LABEL: @test_vsubw_high_u32(
7056 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7057 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7058 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7059 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7060 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_high_u32(uint64x2_t a,uint32x4_t b)7061 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
7062 return vsubw_high_u32(a, b);
7063 }
7064
7065 // CHECK-LABEL: @test_vaddhn_s16(
7066 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7067 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7068 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7069 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7070 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7071 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)7072 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
7073 return vaddhn_s16(a, b);
7074 }
7075
7076 // CHECK-LABEL: @test_vaddhn_s32(
7077 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7078 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7079 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7080 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7081 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7082 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)7083 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
7084 return vaddhn_s32(a, b);
7085 }
7086
7087 // CHECK-LABEL: @test_vaddhn_s64(
7088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7090 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7091 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7092 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7093 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)7094 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
7095 return vaddhn_s64(a, b);
7096 }
7097
7098 // CHECK-LABEL: @test_vaddhn_u16(
7099 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7100 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7101 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7102 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7103 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7104 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)7105 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
7106 return vaddhn_u16(a, b);
7107 }
7108
7109 // CHECK-LABEL: @test_vaddhn_u32(
7110 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7111 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7112 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7113 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7114 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7115 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)7116 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
7117 return vaddhn_u32(a, b);
7118 }
7119
7120 // CHECK-LABEL: @test_vaddhn_u64(
7121 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7122 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7123 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7124 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7125 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7126 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)7127 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
7128 return vaddhn_u64(a, b);
7129 }
7130
7131 // CHECK-LABEL: @test_vaddhn_high_s16(
7132 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7133 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7134 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7135 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7136 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7137 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7138 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7139 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7140 return vaddhn_high_s16(r, a, b);
7141 }
7142
7143 // CHECK-LABEL: @test_vaddhn_high_s32(
7144 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7145 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7146 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7147 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7148 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7149 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7150 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7151 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7152 return vaddhn_high_s32(r, a, b);
7153 }
7154
7155 // CHECK-LABEL: @test_vaddhn_high_s64(
7156 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7157 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7158 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7159 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7160 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7161 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7162 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7163 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7164 return vaddhn_high_s64(r, a, b);
7165 }
7166
7167 // CHECK-LABEL: @test_vaddhn_high_u16(
7168 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7169 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7170 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7171 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7172 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7173 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7174 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vaddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7175 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7176 return vaddhn_high_u16(r, a, b);
7177 }
7178
7179 // CHECK-LABEL: @test_vaddhn_high_u32(
7180 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7181 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7182 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7183 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7184 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7185 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7186 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vaddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7187 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7188 return vaddhn_high_u32(r, a, b);
7189 }
7190
7191 // CHECK-LABEL: @test_vaddhn_high_u64(
7192 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7193 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7194 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7195 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7196 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7197 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7198 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vaddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7199 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7200 return vaddhn_high_u64(r, a, b);
7201 }
7202
7203 // CHECK-LABEL: @test_vraddhn_s16(
7204 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7205 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7206 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7207 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)7208 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
7209 return vraddhn_s16(a, b);
7210 }
7211
7212 // CHECK-LABEL: @test_vraddhn_s32(
7213 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7214 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7215 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7216 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7217 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)7218 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
7219 return vraddhn_s32(a, b);
7220 }
7221
7222 // CHECK-LABEL: @test_vraddhn_s64(
7223 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7224 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7225 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7226 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7227 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)7228 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
7229 return vraddhn_s64(a, b);
7230 }
7231
7232 // CHECK-LABEL: @test_vraddhn_u16(
7233 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7234 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7235 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7236 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)7237 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
7238 return vraddhn_u16(a, b);
7239 }
7240
7241 // CHECK-LABEL: @test_vraddhn_u32(
7242 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7243 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7244 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7245 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7246 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)7247 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
7248 return vraddhn_u32(a, b);
7249 }
7250
7251 // CHECK-LABEL: @test_vraddhn_u64(
7252 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7253 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7254 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7255 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7256 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)7257 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
7258 return vraddhn_u64(a, b);
7259 }
7260
7261 // CHECK-LABEL: @test_vraddhn_high_s16(
7262 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7263 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7264 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7265 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7266 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7267 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7268 return vraddhn_high_s16(r, a, b);
7269 }
7270
7271 // CHECK-LABEL: @test_vraddhn_high_s32(
7272 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7273 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7274 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7275 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7276 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7277 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7278 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7279 return vraddhn_high_s32(r, a, b);
7280 }
7281
7282 // CHECK-LABEL: @test_vraddhn_high_s64(
7283 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7284 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7285 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7286 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7287 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7288 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7289 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7290 return vraddhn_high_s64(r, a, b);
7291 }
7292
7293 // CHECK-LABEL: @test_vraddhn_high_u16(
7294 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7295 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7296 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7297 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7298 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vraddhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7299 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7300 return vraddhn_high_u16(r, a, b);
7301 }
7302
7303 // CHECK-LABEL: @test_vraddhn_high_u32(
7304 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7305 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7306 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7307 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7308 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7309 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vraddhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7310 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7311 return vraddhn_high_u32(r, a, b);
7312 }
7313
7314 // CHECK-LABEL: @test_vraddhn_high_u64(
7315 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7316 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7317 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7318 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7319 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7320 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vraddhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7321 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7322 return vraddhn_high_u64(r, a, b);
7323 }
7324
7325 // CHECK-LABEL: @test_vsubhn_s16(
7326 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7327 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7328 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7329 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7330 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7331 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)7332 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
7333 return vsubhn_s16(a, b);
7334 }
7335
7336 // CHECK-LABEL: @test_vsubhn_s32(
7337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7339 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7340 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7341 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7342 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)7343 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
7344 return vsubhn_s32(a, b);
7345 }
7346
7347 // CHECK-LABEL: @test_vsubhn_s64(
7348 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7349 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7350 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7351 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7352 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7353 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)7354 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
7355 return vsubhn_s64(a, b);
7356 }
7357
7358 // CHECK-LABEL: @test_vsubhn_u16(
7359 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7360 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7361 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7362 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7363 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7364 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)7365 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
7366 return vsubhn_u16(a, b);
7367 }
7368
7369 // CHECK-LABEL: @test_vsubhn_u32(
7370 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7371 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7372 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7373 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7374 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7375 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)7376 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
7377 return vsubhn_u32(a, b);
7378 }
7379
7380 // CHECK-LABEL: @test_vsubhn_u64(
7381 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7382 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7383 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7384 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7385 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7386 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)7387 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
7388 return vsubhn_u64(a, b);
7389 }
7390
7391 // CHECK-LABEL: @test_vsubhn_high_s16(
7392 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7393 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7394 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7395 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7396 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7397 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7398 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7399 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7400 return vsubhn_high_s16(r, a, b);
7401 }
7402
7403 // CHECK-LABEL: @test_vsubhn_high_s32(
7404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7405 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7406 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7407 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7408 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7409 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7410 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7411 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7412 return vsubhn_high_s32(r, a, b);
7413 }
7414
7415 // CHECK-LABEL: @test_vsubhn_high_s64(
7416 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7417 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7418 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7419 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7420 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7421 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7422 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7423 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7424 return vsubhn_high_s64(r, a, b);
7425 }
7426
7427 // CHECK-LABEL: @test_vsubhn_high_u16(
7428 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7429 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7430 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7431 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7432 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7433 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7434 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7435 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7436 return vsubhn_high_u16(r, a, b);
7437 }
7438
7439 // CHECK-LABEL: @test_vsubhn_high_u32(
7440 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7441 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7442 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7443 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7444 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7445 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7446 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7447 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7448 return vsubhn_high_u32(r, a, b);
7449 }
7450
7451 // CHECK-LABEL: @test_vsubhn_high_u64(
7452 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7453 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7454 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7455 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7456 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7457 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7458 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7459 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7460 return vsubhn_high_u64(r, a, b);
7461 }
7462
7463 // CHECK-LABEL: @test_vrsubhn_s16(
7464 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7465 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7466 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7467 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)7468 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
7469 return vrsubhn_s16(a, b);
7470 }
7471
7472 // CHECK-LABEL: @test_vrsubhn_s32(
7473 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7474 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7475 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7476 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7477 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)7478 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
7479 return vrsubhn_s32(a, b);
7480 }
7481
7482 // CHECK-LABEL: @test_vrsubhn_s64(
7483 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7484 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7485 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7486 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7487 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)7488 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
7489 return vrsubhn_s64(a, b);
7490 }
7491
7492 // CHECK-LABEL: @test_vrsubhn_u16(
7493 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7494 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7495 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7496 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)7497 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
7498 return vrsubhn_u16(a, b);
7499 }
7500
7501 // CHECK-LABEL: @test_vrsubhn_u32(
7502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7503 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7504 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7505 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7506 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)7507 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
7508 return vrsubhn_u32(a, b);
7509 }
7510
7511 // CHECK-LABEL: @test_vrsubhn_u64(
7512 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7513 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7514 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7515 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7516 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)7517 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
7518 return vrsubhn_u64(a, b);
7519 }
7520
7521 // CHECK-LABEL: @test_vrsubhn_high_s16(
7522 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7523 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7524 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7525 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7526 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_s16(int8x8_t r,int16x8_t a,int16x8_t b)7527 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7528 return vrsubhn_high_s16(r, a, b);
7529 }
7530
7531 // CHECK-LABEL: @test_vrsubhn_high_s32(
7532 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7533 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7534 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7535 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7536 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7537 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_s32(int16x4_t r,int32x4_t a,int32x4_t b)7538 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7539 return vrsubhn_high_s32(r, a, b);
7540 }
7541
7542 // CHECK-LABEL: @test_vrsubhn_high_s64(
7543 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7544 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7545 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7546 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7547 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7548 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_s64(int32x2_t r,int64x2_t a,int64x2_t b)7549 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7550 return vrsubhn_high_s64(r, a, b);
7551 }
7552
7553 // CHECK-LABEL: @test_vrsubhn_high_u16(
7554 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7555 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7556 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7557 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7558 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
test_vrsubhn_high_u16(uint8x8_t r,uint16x8_t a,uint16x8_t b)7559 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7560 return vrsubhn_high_u16(r, a, b);
7561 }
7562
7563 // CHECK-LABEL: @test_vrsubhn_high_u32(
7564 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7565 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7566 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7567 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7568 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7569 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
test_vrsubhn_high_u32(uint16x4_t r,uint32x4_t a,uint32x4_t b)7570 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7571 return vrsubhn_high_u32(r, a, b);
7572 }
7573
7574 // CHECK-LABEL: @test_vrsubhn_high_u64(
7575 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7576 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7577 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7578 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7579 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7580 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
test_vrsubhn_high_u64(uint32x2_t r,uint64x2_t a,uint64x2_t b)7581 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7582 return vrsubhn_high_u64(r, a, b);
7583 }
7584
7585 // CHECK-LABEL: @test_vabdl_s8(
7586 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7587 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7588 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)7589 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
7590 return vabdl_s8(a, b);
7591 }
7592
7593 // CHECK-LABEL: @test_vabdl_s16(
7594 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7595 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7596 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7597 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7598 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7599 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)7600 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
7601 return vabdl_s16(a, b);
7602 }
7603
7604 // CHECK-LABEL: @test_vabdl_s32(
7605 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7606 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7607 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7608 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7609 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7610 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)7611 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
7612 return vabdl_s32(a, b);
7613 }
7614
7615 // CHECK-LABEL: @test_vabdl_u8(
7616 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7617 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7618 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)7619 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
7620 return vabdl_u8(a, b);
7621 }
7622
7623 // CHECK-LABEL: @test_vabdl_u16(
7624 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7625 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7626 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7627 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7628 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7629 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)7630 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
7631 return vabdl_u16(a, b);
7632 }
7633
7634 // CHECK-LABEL: @test_vabdl_u32(
7635 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7636 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7637 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7638 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7639 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7640 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)7641 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
7642 return vabdl_u32(a, b);
7643 }
7644
7645 // CHECK-LABEL: @test_vabal_s8(
7646 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7647 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7648 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7649 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7650 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7651 return vabal_s8(a, b, c);
7652 }
7653
7654 // CHECK-LABEL: @test_vabal_s16(
7655 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7656 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7657 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7658 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7659 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7660 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7661 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7662 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7663 return vabal_s16(a, b, c);
7664 }
7665
7666 // CHECK-LABEL: @test_vabal_s32(
7667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7668 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7669 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7670 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7671 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7672 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7673 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7674 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7675 return vabal_s32(a, b, c);
7676 }
7677
7678 // CHECK-LABEL: @test_vabal_u8(
7679 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7680 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7681 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7682 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7683 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7684 return vabal_u8(a, b, c);
7685 }
7686
7687 // CHECK-LABEL: @test_vabal_u16(
7688 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7689 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7690 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7691 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7692 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7693 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7694 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7695 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7696 return vabal_u16(a, b, c);
7697 }
7698
7699 // CHECK-LABEL: @test_vabal_u32(
7700 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7701 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7702 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7703 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7704 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7705 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7706 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7707 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7708 return vabal_u32(a, b, c);
7709 }
7710
7711 // CHECK-LABEL: @test_vabdl_high_s8(
7712 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7713 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7714 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7715 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7716 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_s8(int8x16_t a,int8x16_t b)7717 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
7718 return vabdl_high_s8(a, b);
7719 }
7720
7721 // CHECK-LABEL: @test_vabdl_high_s16(
7722 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7723 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7724 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7725 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7726 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7727 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7728 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7729 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_s16(int16x8_t a,int16x8_t b)7730 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
7731 return vabdl_high_s16(a, b);
7732 }
7733
7734 // CHECK-LABEL: @test_vabdl_high_s32(
7735 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7736 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7738 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7739 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7740 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7741 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7742 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_s32(int32x4_t a,int32x4_t b)7743 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
7744 return vabdl_high_s32(a, b);
7745 }
7746
7747 // CHECK-LABEL: @test_vabdl_high_u8(
7748 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7749 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7750 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7751 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7752 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
test_vabdl_high_u8(uint8x16_t a,uint8x16_t b)7753 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
7754 return vabdl_high_u8(a, b);
7755 }
7756
7757 // CHECK-LABEL: @test_vabdl_high_u16(
7758 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7759 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7760 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7761 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7762 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7763 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7764 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7765 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
test_vabdl_high_u16(uint16x8_t a,uint16x8_t b)7766 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
7767 return vabdl_high_u16(a, b);
7768 }
7769
7770 // CHECK-LABEL: @test_vabdl_high_u32(
7771 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7772 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7773 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7774 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7775 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7776 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7777 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7778 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
test_vabdl_high_u32(uint32x4_t a,uint32x4_t b)7779 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
7780 return vabdl_high_u32(a, b);
7781 }
7782
7783 // CHECK-LABEL: @test_vabal_high_s8(
7784 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7785 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7786 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7787 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7788 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7789 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vabal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)7790 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
7791 return vabal_high_s8(a, b, c);
7792 }
7793
7794 // CHECK-LABEL: @test_vabal_high_s16(
7795 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7796 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7798 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7799 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7800 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7801 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7802 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7803 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vabal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)7804 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
7805 return vabal_high_s16(a, b, c);
7806 }
7807
7808 // CHECK-LABEL: @test_vabal_high_s32(
7809 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7810 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7812 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7813 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7814 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7815 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7816 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7817 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vabal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)7818 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
7819 return vabal_high_s32(a, b, c);
7820 }
7821
7822 // CHECK-LABEL: @test_vabal_high_u8(
7823 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7824 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7825 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7826 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7827 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7828 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vabal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)7829 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
7830 return vabal_high_u8(a, b, c);
7831 }
7832
7833 // CHECK-LABEL: @test_vabal_high_u16(
7834 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7835 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7836 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7837 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7838 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7839 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7840 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7841 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7842 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vabal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)7843 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
7844 return vabal_high_u16(a, b, c);
7845 }
7846
7847 // CHECK-LABEL: @test_vabal_high_u32(
7848 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7849 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7850 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7851 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7852 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7853 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7854 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7855 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7856 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vabal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)7857 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
7858 return vabal_high_u32(a, b, c);
7859 }
7860
7861 // CHECK-LABEL: @test_vmull_s8(
7862 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
7863 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)7864 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
7865 return vmull_s8(a, b);
7866 }
7867
7868 // CHECK-LABEL: @test_vmull_s16(
7869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7870 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7871 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
7872 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)7873 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
7874 return vmull_s16(a, b);
7875 }
7876
7877 // CHECK-LABEL: @test_vmull_s32(
7878 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7879 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7880 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
7881 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)7882 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
7883 return vmull_s32(a, b);
7884 }
7885
7886 // CHECK-LABEL: @test_vmull_u8(
7887 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
7888 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)7889 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
7890 return vmull_u8(a, b);
7891 }
7892
7893 // CHECK-LABEL: @test_vmull_u16(
7894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7895 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7896 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
7897 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)7898 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
7899 return vmull_u16(a, b);
7900 }
7901
7902 // CHECK-LABEL: @test_vmull_u32(
7903 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7904 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7905 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
7906 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)7907 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
7908 return vmull_u32(a, b);
7909 }
7910
7911 // CHECK-LABEL: @test_vmull_high_s8(
7912 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7913 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7914 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7915 // CHECK: ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_s8(int8x16_t a,int8x16_t b)7916 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
7917 return vmull_high_s8(a, b);
7918 }
7919
7920 // CHECK-LABEL: @test_vmull_high_s16(
7921 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7922 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7925 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7926 // CHECK: ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_s16(int16x8_t a,int16x8_t b)7927 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
7928 return vmull_high_s16(a, b);
7929 }
7930
7931 // CHECK-LABEL: @test_vmull_high_s32(
7932 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7933 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7934 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7935 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7936 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7937 // CHECK: ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_s32(int32x4_t a,int32x4_t b)7938 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
7939 return vmull_high_s32(a, b);
7940 }
7941
7942 // CHECK-LABEL: @test_vmull_high_u8(
7943 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7944 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7945 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7946 // CHECK: ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_u8(uint8x16_t a,uint8x16_t b)7947 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
7948 return vmull_high_u8(a, b);
7949 }
7950
7951 // CHECK-LABEL: @test_vmull_high_u16(
7952 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7953 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7954 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7955 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7956 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7957 // CHECK: ret <4 x i32> [[VMULL2_I_I]]
test_vmull_high_u16(uint16x8_t a,uint16x8_t b)7958 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
7959 return vmull_high_u16(a, b);
7960 }
7961
7962 // CHECK-LABEL: @test_vmull_high_u32(
7963 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7964 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7965 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7966 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7967 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7968 // CHECK: ret <2 x i64> [[VMULL2_I_I]]
test_vmull_high_u32(uint32x4_t a,uint32x4_t b)7969 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
7970 return vmull_high_u32(a, b);
7971 }
7972
7973 // CHECK-LABEL: @test_vmlal_s8(
7974 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
7975 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7976 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7977 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7978 return vmlal_s8(a, b, c);
7979 }
7980
7981 // CHECK-LABEL: @test_vmlal_s16(
7982 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7983 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7984 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
7985 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7986 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7987 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7988 return vmlal_s16(a, b, c);
7989 }
7990
7991 // CHECK-LABEL: @test_vmlal_s32(
7992 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7993 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7994 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
7995 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7996 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7997 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7998 return vmlal_s32(a, b, c);
7999 }
8000
8001 // CHECK-LABEL: @test_vmlal_u8(
8002 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8003 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8004 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8005 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8006 return vmlal_u8(a, b, c);
8007 }
8008
8009 // CHECK-LABEL: @test_vmlal_u16(
8010 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8011 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8012 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8013 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8014 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8015 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8016 return vmlal_u16(a, b, c);
8017 }
8018
8019 // CHECK-LABEL: @test_vmlal_u32(
8020 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8021 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8022 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8023 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8024 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8025 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8026 return vmlal_u32(a, b, c);
8027 }
8028
8029 // CHECK-LABEL: @test_vmlal_high_s8(
8030 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8031 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8032 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8033 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8034 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8035 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8036 return vmlal_high_s8(a, b, c);
8037 }
8038
8039 // CHECK-LABEL: @test_vmlal_high_s16(
8040 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8041 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8042 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8043 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8044 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8045 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8046 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8047 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8048 return vmlal_high_s16(a, b, c);
8049 }
8050
8051 // CHECK-LABEL: @test_vmlal_high_s32(
8052 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8053 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8054 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8055 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8056 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8057 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8058 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8059 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8060 return vmlal_high_s32(a, b, c);
8061 }
8062
8063 // CHECK-LABEL: @test_vmlal_high_u8(
8064 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8065 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8066 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8067 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8068 // CHECK: ret <8 x i16> [[ADD_I_I]]
test_vmlal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8069 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8070 return vmlal_high_u8(a, b, c);
8071 }
8072
8073 // CHECK-LABEL: @test_vmlal_high_u16(
8074 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8075 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8076 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8077 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8078 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8079 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8080 // CHECK: ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8081 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8082 return vmlal_high_u16(a, b, c);
8083 }
8084
8085 // CHECK-LABEL: @test_vmlal_high_u32(
8086 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8087 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8090 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8091 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8092 // CHECK: ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8093 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8094 return vmlal_high_u32(a, b, c);
8095 }
8096
8097 // CHECK-LABEL: @test_vmlsl_s8(
8098 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
8099 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8100 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)8101 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8102 return vmlsl_s8(a, b, c);
8103 }
8104
8105 // CHECK-LABEL: @test_vmlsl_s16(
8106 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8107 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8108 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
8109 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8110 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8111 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8112 return vmlsl_s16(a, b, c);
8113 }
8114
8115 // CHECK-LABEL: @test_vmlsl_s32(
8116 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8117 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8118 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
8119 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8120 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8121 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8122 return vmlsl_s32(a, b, c);
8123 }
8124
8125 // CHECK-LABEL: @test_vmlsl_u8(
8126 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8127 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8128 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)8129 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8130 return vmlsl_u8(a, b, c);
8131 }
8132
8133 // CHECK-LABEL: @test_vmlsl_u16(
8134 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8135 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8136 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8137 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8138 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)8139 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8140 return vmlsl_u16(a, b, c);
8141 }
8142
8143 // CHECK-LABEL: @test_vmlsl_u32(
8144 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8145 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8146 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8147 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8148 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)8149 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8150 return vmlsl_u32(a, b, c);
8151 }
8152
8153 // CHECK-LABEL: @test_vmlsl_high_s8(
8154 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8155 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8156 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8157 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8158 // CHECK: ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)8159 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8160 return vmlsl_high_s8(a, b, c);
8161 }
8162
8163 // CHECK-LABEL: @test_vmlsl_high_s16(
8164 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8165 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8166 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8167 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8168 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8169 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8170 // CHECK: ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8171 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8172 return vmlsl_high_s16(a, b, c);
8173 }
8174
8175 // CHECK-LABEL: @test_vmlsl_high_s32(
8176 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8177 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8178 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8179 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8180 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8181 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8182 // CHECK: ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8183 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8184 return vmlsl_high_s32(a, b, c);
8185 }
8186
8187 // CHECK-LABEL: @test_vmlsl_high_u8(
8188 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8189 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8190 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8191 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8192 // CHECK: ret <8 x i16> [[SUB_I_I]]
test_vmlsl_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)8193 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8194 return vmlsl_high_u8(a, b, c);
8195 }
8196
8197 // CHECK-LABEL: @test_vmlsl_high_u16(
8198 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8199 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8201 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8202 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8203 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8204 // CHECK: ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)8205 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8206 return vmlsl_high_u16(a, b, c);
8207 }
8208
8209 // CHECK-LABEL: @test_vmlsl_high_u32(
8210 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8211 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8212 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8213 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8214 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8215 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8216 // CHECK: ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)8217 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8218 return vmlsl_high_u32(a, b, c);
8219 }
8220
8221 // CHECK-LABEL: @test_vqdmull_s16(
8222 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8223 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8224 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
8225 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
8226 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)8227 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
8228 return vqdmull_s16(a, b);
8229 }
8230
8231 // CHECK-LABEL: @test_vqdmull_s32(
8232 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8233 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8234 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
8235 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
8236 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)8237 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
8238 return vqdmull_s32(a, b);
8239 }
8240
8241 // CHECK-LABEL: @test_vqdmlal_s16(
8242 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8243 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8244 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8245 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8246 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8247 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)8248 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8249 return vqdmlal_s16(a, b, c);
8250 }
8251
8252 // CHECK-LABEL: @test_vqdmlal_s32(
8253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8254 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8255 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8256 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8257 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8258 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)8259 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8260 return vqdmlal_s32(a, b, c);
8261 }
8262
8263 // CHECK-LABEL: @test_vqdmlsl_s16(
8264 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8265 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8266 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8267 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8268 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8269 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)8270 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8271 return vqdmlsl_s16(a, b, c);
8272 }
8273
8274 // CHECK-LABEL: @test_vqdmlsl_s32(
8275 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8276 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8277 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8278 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8279 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8280 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)8281 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8282 return vqdmlsl_s32(a, b, c);
8283 }
8284
8285 // CHECK-LABEL: @test_vqdmull_high_s16(
8286 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8287 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8288 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8289 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8290 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8291 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
8292 // CHECK: ret <4 x i32> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s16(int16x8_t a,int16x8_t b)8293 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
8294 return vqdmull_high_s16(a, b);
8295 }
8296
8297 // CHECK-LABEL: @test_vqdmull_high_s32(
8298 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8299 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8300 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8301 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8302 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8303 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
8304 // CHECK: ret <2 x i64> [[VQDMULL_V2_I_I]]
test_vqdmull_high_s32(int32x4_t a,int32x4_t b)8305 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
8306 return vqdmull_high_s32(a, b);
8307 }
8308
8309 // CHECK-LABEL: @test_vqdmlal_high_s16(
8310 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8311 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8312 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8313 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8314 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8315 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8316 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8317 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8318 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8319 return vqdmlal_high_s16(a, b, c);
8320 }
8321
8322 // CHECK-LABEL: @test_vqdmlal_high_s32(
8323 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8324 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8325 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8326 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8327 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8328 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8329 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8330 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]]
test_vqdmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8331 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8332 return vqdmlal_high_s32(a, b, c);
8333 }
8334
8335 // CHECK-LABEL: @test_vqdmlsl_high_s16(
8336 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8337 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8338 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8339 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8340 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8341 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8342 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8343 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)8344 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8345 return vqdmlsl_high_s16(a, b, c);
8346 }
8347
8348 // CHECK-LABEL: @test_vqdmlsl_high_s32(
8349 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8350 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8351 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8352 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8353 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8354 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8355 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8356 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]]
test_vqdmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)8357 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8358 return vqdmlsl_high_s32(a, b, c);
8359 }
8360
8361 // CHECK-LABEL: @test_vmull_p8(
8362 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
8363 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)8364 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8365 return vmull_p8(a, b);
8366 }
8367
8368 // CHECK-LABEL: @test_vmull_high_p8(
8369 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8370 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8371 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8372 // CHECK: ret <8 x i16> [[VMULL_I_I]]
test_vmull_high_p8(poly8x16_t a,poly8x16_t b)8373 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
8374 return vmull_high_p8(a, b);
8375 }
8376
8377 // CHECK-LABEL: @test_vaddd_s64(
8378 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
8379 // CHECK: ret i64 [[VADDD_I]]
test_vaddd_s64(int64_t a,int64_t b)8380 int64_t test_vaddd_s64(int64_t a, int64_t b) {
8381 return vaddd_s64(a, b);
8382 }
8383
8384 // CHECK-LABEL: @test_vaddd_u64(
8385 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
8386 // CHECK: ret i64 [[VADDD_I]]
test_vaddd_u64(uint64_t a,uint64_t b)8387 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
8388 return vaddd_u64(a, b);
8389 }
8390
8391 // CHECK-LABEL: @test_vsubd_s64(
8392 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
8393 // CHECK: ret i64 [[VSUBD_I]]
test_vsubd_s64(int64_t a,int64_t b)8394 int64_t test_vsubd_s64(int64_t a, int64_t b) {
8395 return vsubd_s64(a, b);
8396 }
8397
8398 // CHECK-LABEL: @test_vsubd_u64(
8399 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
8400 // CHECK: ret i64 [[VSUBD_I]]
test_vsubd_u64(uint64_t a,uint64_t b)8401 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
8402 return vsubd_u64(a, b);
8403 }
8404
8405 // CHECK-LABEL: @test_vqaddb_s8(
8406 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8407 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8408 // CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8409 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
8410 // CHECK: ret i8 [[TMP2]]
test_vqaddb_s8(int8_t a,int8_t b)8411 int8_t test_vqaddb_s8(int8_t a, int8_t b) {
8412 return vqaddb_s8(a, b);
8413 }
8414
8415 // CHECK-LABEL: @test_vqaddh_s16(
8416 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8417 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8418 // CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8419 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
8420 // CHECK: ret i16 [[TMP2]]
test_vqaddh_s16(int16_t a,int16_t b)8421 int16_t test_vqaddh_s16(int16_t a, int16_t b) {
8422 return vqaddh_s16(a, b);
8423 }
8424
8425 // CHECK-LABEL: @test_vqadds_s32(
8426 // CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b)
8427 // CHECK: ret i32 [[VQADDS_S32_I]]
test_vqadds_s32(int32_t a,int32_t b)8428 int32_t test_vqadds_s32(int32_t a, int32_t b) {
8429 return vqadds_s32(a, b);
8430 }
8431
8432 // CHECK-LABEL: @test_vqaddd_s64(
8433 // CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b)
8434 // CHECK: ret i64 [[VQADDD_S64_I]]
test_vqaddd_s64(int64_t a,int64_t b)8435 int64_t test_vqaddd_s64(int64_t a, int64_t b) {
8436 return vqaddd_s64(a, b);
8437 }
8438
8439 // CHECK-LABEL: @test_vqaddb_u8(
8440 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8441 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8442 // CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8443 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
8444 // CHECK: ret i8 [[TMP2]]
test_vqaddb_u8(uint8_t a,uint8_t b)8445 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
8446 return vqaddb_u8(a, b);
8447 }
8448
8449 // CHECK-LABEL: @test_vqaddh_u16(
8450 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8451 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8452 // CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8453 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
8454 // CHECK: ret i16 [[TMP2]]
test_vqaddh_u16(uint16_t a,uint16_t b)8455 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
8456 return vqaddh_u16(a, b);
8457 }
8458
8459 // CHECK-LABEL: @test_vqadds_u32(
8460 // CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b)
8461 // CHECK: ret i32 [[VQADDS_U32_I]]
test_vqadds_u32(uint32_t a,uint32_t b)8462 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
8463 return vqadds_u32(a, b);
8464 }
8465
8466 // CHECK-LABEL: @test_vqaddd_u64(
8467 // CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b)
8468 // CHECK: ret i64 [[VQADDD_U64_I]]
test_vqaddd_u64(uint64_t a,uint64_t b)8469 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
8470 return vqaddd_u64(a, b);
8471 }
8472
8473 // CHECK-LABEL: @test_vqsubb_s8(
8474 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8475 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8476 // CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8477 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
8478 // CHECK: ret i8 [[TMP2]]
test_vqsubb_s8(int8_t a,int8_t b)8479 int8_t test_vqsubb_s8(int8_t a, int8_t b) {
8480 return vqsubb_s8(a, b);
8481 }
8482
8483 // CHECK-LABEL: @test_vqsubh_s16(
8484 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8485 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8486 // CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8487 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
8488 // CHECK: ret i16 [[TMP2]]
test_vqsubh_s16(int16_t a,int16_t b)8489 int16_t test_vqsubh_s16(int16_t a, int16_t b) {
8490 return vqsubh_s16(a, b);
8491 }
8492
8493 // CHECK-LABEL: @test_vqsubs_s32(
8494 // CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
8495 // CHECK: ret i32 [[VQSUBS_S32_I]]
test_vqsubs_s32(int32_t a,int32_t b)8496 int32_t test_vqsubs_s32(int32_t a, int32_t b) {
8497 return vqsubs_s32(a, b);
8498 }
8499
8500 // CHECK-LABEL: @test_vqsubd_s64(
8501 // CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b)
8502 // CHECK: ret i64 [[VQSUBD_S64_I]]
test_vqsubd_s64(int64_t a,int64_t b)8503 int64_t test_vqsubd_s64(int64_t a, int64_t b) {
8504 return vqsubd_s64(a, b);
8505 }
8506
8507 // CHECK-LABEL: @test_vqsubb_u8(
8508 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8509 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8510 // CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8511 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
8512 // CHECK: ret i8 [[TMP2]]
test_vqsubb_u8(uint8_t a,uint8_t b)8513 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
8514 return vqsubb_u8(a, b);
8515 }
8516
8517 // CHECK-LABEL: @test_vqsubh_u16(
8518 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8519 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8520 // CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8521 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
8522 // CHECK: ret i16 [[TMP2]]
test_vqsubh_u16(uint16_t a,uint16_t b)8523 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
8524 return vqsubh_u16(a, b);
8525 }
8526
8527 // CHECK-LABEL: @test_vqsubs_u32(
8528 // CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
8529 // CHECK: ret i32 [[VQSUBS_U32_I]]
test_vqsubs_u32(uint32_t a,uint32_t b)8530 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
8531 return vqsubs_u32(a, b);
8532 }
8533
8534 // CHECK-LABEL: @test_vqsubd_u64(
8535 // CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
8536 // CHECK: ret i64 [[VQSUBD_U64_I]]
test_vqsubd_u64(uint64_t a,uint64_t b)8537 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
8538 return vqsubd_u64(a, b);
8539 }
8540
8541 // CHECK-LABEL: @test_vshld_s64(
8542 // CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
8543 // CHECK: ret i64 [[VSHLD_S64_I]]
test_vshld_s64(int64_t a,int64_t b)8544 int64_t test_vshld_s64(int64_t a, int64_t b) {
8545 return vshld_s64(a, b);
8546 }
8547
8548 // CHECK-LABEL: @test_vshld_u64(
8549 // CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
8550 // CHECK: ret i64 [[VSHLD_U64_I]]
test_vshld_u64(uint64_t a,uint64_t b)8551 uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
8552 return vshld_u64(a, b);
8553 }
8554
8555 // CHECK-LABEL: @test_vqshlb_s8(
8556 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8557 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8558 // CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8559 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
8560 // CHECK: ret i8 [[TMP2]]
test_vqshlb_s8(int8_t a,int8_t b)8561 int8_t test_vqshlb_s8(int8_t a, int8_t b) {
8562 return vqshlb_s8(a, b);
8563 }
8564
8565 // CHECK-LABEL: @test_vqshlh_s16(
8566 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8567 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8568 // CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8569 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
8570 // CHECK: ret i16 [[TMP2]]
test_vqshlh_s16(int16_t a,int16_t b)8571 int16_t test_vqshlh_s16(int16_t a, int16_t b) {
8572 return vqshlh_s16(a, b);
8573 }
8574
8575 // CHECK-LABEL: @test_vqshls_s32(
8576 // CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
8577 // CHECK: ret i32 [[VQSHLS_S32_I]]
test_vqshls_s32(int32_t a,int32_t b)8578 int32_t test_vqshls_s32(int32_t a, int32_t b) {
8579 return vqshls_s32(a, b);
8580 }
8581
8582 // CHECK-LABEL: @test_vqshld_s64(
8583 // CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
8584 // CHECK: ret i64 [[VQSHLD_S64_I]]
test_vqshld_s64(int64_t a,int64_t b)8585 int64_t test_vqshld_s64(int64_t a, int64_t b) {
8586 return vqshld_s64(a, b);
8587 }
8588
8589 // CHECK-LABEL: @test_vqshlb_u8(
8590 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8591 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8592 // CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8593 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
8594 // CHECK: ret i8 [[TMP2]]
test_vqshlb_u8(uint8_t a,uint8_t b)8595 uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
8596 return vqshlb_u8(a, b);
8597 }
8598
8599 // CHECK-LABEL: @test_vqshlh_u16(
8600 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8601 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8602 // CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8603 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
8604 // CHECK: ret i16 [[TMP2]]
test_vqshlh_u16(uint16_t a,uint16_t b)8605 uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
8606 return vqshlh_u16(a, b);
8607 }
8608
8609 // CHECK-LABEL: @test_vqshls_u32(
8610 // CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
8611 // CHECK: ret i32 [[VQSHLS_U32_I]]
test_vqshls_u32(uint32_t a,uint32_t b)8612 uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
8613 return vqshls_u32(a, b);
8614 }
8615
8616 // CHECK-LABEL: @test_vqshld_u64(
8617 // CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
8618 // CHECK: ret i64 [[VQSHLD_U64_I]]
test_vqshld_u64(uint64_t a,uint64_t b)8619 uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
8620 return vqshld_u64(a, b);
8621 }
8622
8623 // CHECK-LABEL: @test_vrshld_s64(
8624 // CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
8625 // CHECK: ret i64 [[VRSHLD_S64_I]]
test_vrshld_s64(int64_t a,int64_t b)8626 int64_t test_vrshld_s64(int64_t a, int64_t b) {
8627 return vrshld_s64(a, b);
8628 }
8629
8630 // CHECK-LABEL: @test_vrshld_u64(
8631 // CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
8632 // CHECK: ret i64 [[VRSHLD_U64_I]]
test_vrshld_u64(uint64_t a,uint64_t b)8633 uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
8634 return vrshld_u64(a, b);
8635 }
8636
8637 // CHECK-LABEL: @test_vqrshlb_s8(
8638 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8639 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8640 // CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8641 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
8642 // CHECK: ret i8 [[TMP2]]
test_vqrshlb_s8(int8_t a,int8_t b)8643 int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
8644 return vqrshlb_s8(a, b);
8645 }
8646
8647 // CHECK-LABEL: @test_vqrshlh_s16(
8648 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8649 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8650 // CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8651 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
8652 // CHECK: ret i16 [[TMP2]]
test_vqrshlh_s16(int16_t a,int16_t b)8653 int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
8654 return vqrshlh_s16(a, b);
8655 }
8656
8657 // CHECK-LABEL: @test_vqrshls_s32(
8658 // CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
8659 // CHECK: ret i32 [[VQRSHLS_S32_I]]
test_vqrshls_s32(int32_t a,int32_t b)8660 int32_t test_vqrshls_s32(int32_t a, int32_t b) {
8661 return vqrshls_s32(a, b);
8662 }
8663
8664 // CHECK-LABEL: @test_vqrshld_s64(
8665 // CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
8666 // CHECK: ret i64 [[VQRSHLD_S64_I]]
test_vqrshld_s64(int64_t a,int64_t b)8667 int64_t test_vqrshld_s64(int64_t a, int64_t b) {
8668 return vqrshld_s64(a, b);
8669 }
8670
8671 // CHECK-LABEL: @test_vqrshlb_u8(
8672 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
8673 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
8674 // CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8675 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
8676 // CHECK: ret i8 [[TMP2]]
test_vqrshlb_u8(uint8_t a,uint8_t b)8677 uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
8678 return vqrshlb_u8(a, b);
8679 }
8680
8681 // CHECK-LABEL: @test_vqrshlh_u16(
8682 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8683 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8684 // CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8685 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
8686 // CHECK: ret i16 [[TMP2]]
test_vqrshlh_u16(uint16_t a,uint16_t b)8687 uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
8688 return vqrshlh_u16(a, b);
8689 }
8690
8691 // CHECK-LABEL: @test_vqrshls_u32(
8692 // CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
8693 // CHECK: ret i32 [[VQRSHLS_U32_I]]
test_vqrshls_u32(uint32_t a,uint32_t b)8694 uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
8695 return vqrshls_u32(a, b);
8696 }
8697
8698 // CHECK-LABEL: @test_vqrshld_u64(
8699 // CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
8700 // CHECK: ret i64 [[VQRSHLD_U64_I]]
test_vqrshld_u64(uint64_t a,uint64_t b)8701 uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
8702 return vqrshld_u64(a, b);
8703 }
8704
8705 // CHECK-LABEL: @test_vpaddd_s64(
8706 // CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
8707 // CHECK: ret i64 [[VPADDD_S64_I]]
test_vpaddd_s64(int64x2_t a)8708 int64_t test_vpaddd_s64(int64x2_t a) {
8709 return vpaddd_s64(a);
8710 }
8711
8712 // CHECK-LABEL: @test_vpadds_f32(
8713 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
8714 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
8715 // CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
8716 // CHECK: ret float [[VPADDD_I]]
test_vpadds_f32(float32x2_t a)8717 float32_t test_vpadds_f32(float32x2_t a) {
8718 return vpadds_f32(a);
8719 }
8720
8721 // CHECK-LABEL: @test_vpaddd_f64(
8722 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
8723 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
8724 // CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
8725 // CHECK: ret double [[VPADDD_I]]
test_vpaddd_f64(float64x2_t a)8726 float64_t test_vpaddd_f64(float64x2_t a) {
8727 return vpaddd_f64(a);
8728 }
8729
8730 // CHECK-LABEL: @test_vpmaxnms_f32(
8731 // CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
8732 // CHECK: ret float [[VPMAXNMS_F32_I]]
test_vpmaxnms_f32(float32x2_t a)8733 float32_t test_vpmaxnms_f32(float32x2_t a) {
8734 return vpmaxnms_f32(a);
8735 }
8736
8737 // CHECK-LABEL: @test_vpmaxnmqd_f64(
8738 // CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
8739 // CHECK: ret double [[VPMAXNMQD_F64_I]]
test_vpmaxnmqd_f64(float64x2_t a)8740 float64_t test_vpmaxnmqd_f64(float64x2_t a) {
8741 return vpmaxnmqd_f64(a);
8742 }
8743
8744 // CHECK-LABEL: @test_vpmaxs_f32(
8745 // CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
8746 // CHECK: ret float [[VPMAXS_F32_I]]
test_vpmaxs_f32(float32x2_t a)8747 float32_t test_vpmaxs_f32(float32x2_t a) {
8748 return vpmaxs_f32(a);
8749 }
8750
8751 // CHECK-LABEL: @test_vpmaxqd_f64(
8752 // CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
8753 // CHECK: ret double [[VPMAXQD_F64_I]]
test_vpmaxqd_f64(float64x2_t a)8754 float64_t test_vpmaxqd_f64(float64x2_t a) {
8755 return vpmaxqd_f64(a);
8756 }
8757
8758 // CHECK-LABEL: @test_vpminnms_f32(
8759 // CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
8760 // CHECK: ret float [[VPMINNMS_F32_I]]
test_vpminnms_f32(float32x2_t a)8761 float32_t test_vpminnms_f32(float32x2_t a) {
8762 return vpminnms_f32(a);
8763 }
8764
8765 // CHECK-LABEL: @test_vpminnmqd_f64(
8766 // CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
8767 // CHECK: ret double [[VPMINNMQD_F64_I]]
test_vpminnmqd_f64(float64x2_t a)8768 float64_t test_vpminnmqd_f64(float64x2_t a) {
8769 return vpminnmqd_f64(a);
8770 }
8771
8772 // CHECK-LABEL: @test_vpmins_f32(
8773 // CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
8774 // CHECK: ret float [[VPMINS_F32_I]]
test_vpmins_f32(float32x2_t a)8775 float32_t test_vpmins_f32(float32x2_t a) {
8776 return vpmins_f32(a);
8777 }
8778
8779 // CHECK-LABEL: @test_vpminqd_f64(
8780 // CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
8781 // CHECK: ret double [[VPMINQD_F64_I]]
test_vpminqd_f64(float64x2_t a)8782 float64_t test_vpminqd_f64(float64x2_t a) {
8783 return vpminqd_f64(a);
8784 }
8785
8786 // CHECK-LABEL: @test_vqdmulhh_s16(
8787 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8788 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8789 // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8790 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
8791 // CHECK: ret i16 [[TMP2]]
test_vqdmulhh_s16(int16_t a,int16_t b)8792 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
8793 return vqdmulhh_s16(a, b);
8794 }
8795
8796 // CHECK-LABEL: @test_vqdmulhs_s32(
8797 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
8798 // CHECK: ret i32 [[VQDMULHS_S32_I]]
test_vqdmulhs_s32(int32_t a,int32_t b)8799 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
8800 return vqdmulhs_s32(a, b);
8801 }
8802
8803 // CHECK-LABEL: @test_vqrdmulhh_s16(
8804 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
8805 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
8806 // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8807 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
8808 // CHECK: ret i16 [[TMP2]]
test_vqrdmulhh_s16(int16_t a,int16_t b)8809 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
8810 return vqrdmulhh_s16(a, b);
8811 }
8812
8813 // CHECK-LABEL: @test_vqrdmulhs_s32(
8814 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b)
8815 // CHECK: ret i32 [[VQRDMULHS_S32_I]]
test_vqrdmulhs_s32(int32_t a,int32_t b)8816 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
8817 return vqrdmulhs_s32(a, b);
8818 }
8819
8820 // CHECK-LABEL: @test_vmulxs_f32(
8821 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
8822 // CHECK: ret float [[VMULXS_F32_I]]
test_vmulxs_f32(float32_t a,float32_t b)8823 float32_t test_vmulxs_f32(float32_t a, float32_t b) {
8824 return vmulxs_f32(a, b);
8825 }
8826
8827 // CHECK-LABEL: @test_vmulxd_f64(
8828 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
8829 // CHECK: ret double [[VMULXD_F64_I]]
test_vmulxd_f64(float64_t a,float64_t b)8830 float64_t test_vmulxd_f64(float64_t a, float64_t b) {
8831 return vmulxd_f64(a, b);
8832 }
8833
8834 // CHECK-LABEL: @test_vmulx_f64(
8835 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
8836 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
8837 // CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
8838 // CHECK: ret <1 x double> [[VMULX2_I]]
test_vmulx_f64(float64x1_t a,float64x1_t b)8839 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
8840 return vmulx_f64(a, b);
8841 }
8842
8843 // CHECK-LABEL: @test_vrecpss_f32(
8844 // CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
8845 // CHECK: ret float [[VRECPS_I]]
test_vrecpss_f32(float32_t a,float32_t b)8846 float32_t test_vrecpss_f32(float32_t a, float32_t b) {
8847 return vrecpss_f32(a, b);
8848 }
8849
8850 // CHECK-LABEL: @test_vrecpsd_f64(
8851 // CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
8852 // CHECK: ret double [[VRECPS_I]]
test_vrecpsd_f64(float64_t a,float64_t b)8853 float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
8854 return vrecpsd_f64(a, b);
8855 }
8856
8857 // CHECK-LABEL: @test_vrsqrtss_f32(
8858 // CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
8859 // CHECK: ret float [[VRSQRTSS_F32_I]]
test_vrsqrtss_f32(float32_t a,float32_t b)8860 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
8861 return vrsqrtss_f32(a, b);
8862 }
8863
8864 // CHECK-LABEL: @test_vrsqrtsd_f64(
8865 // CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
8866 // CHECK: ret double [[VRSQRTSD_F64_I]]
test_vrsqrtsd_f64(float64_t a,float64_t b)8867 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
8868 return vrsqrtsd_f64(a, b);
8869 }
8870
8871 // CHECK-LABEL: @test_vcvts_f32_s32(
8872 // CHECK: [[TMP0:%.*]] = sitofp i32 %a to float
8873 // CHECK: ret float [[TMP0]]
test_vcvts_f32_s32(int32_t a)8874 float32_t test_vcvts_f32_s32(int32_t a) {
8875 return vcvts_f32_s32(a);
8876 }
8877
8878 // CHECK-LABEL: @test_vcvtd_f64_s64(
8879 // CHECK: [[TMP0:%.*]] = sitofp i64 %a to double
8880 // CHECK: ret double [[TMP0]]
test_vcvtd_f64_s64(int64_t a)8881 float64_t test_vcvtd_f64_s64(int64_t a) {
8882 return vcvtd_f64_s64(a);
8883 }
8884
8885 // CHECK-LABEL: @test_vcvts_f32_u32(
8886 // CHECK: [[TMP0:%.*]] = uitofp i32 %a to float
8887 // CHECK: ret float [[TMP0]]
test_vcvts_f32_u32(uint32_t a)8888 float32_t test_vcvts_f32_u32(uint32_t a) {
8889 return vcvts_f32_u32(a);
8890 }
8891
8892 // CHECK-LABEL: @test_vcvtd_f64_u64(
8893 // CHECK: [[TMP0:%.*]] = uitofp i64 %a to double
8894 // CHECK: ret double [[TMP0]]
test_vcvtd_f64_u64(uint64_t a)8895 float64_t test_vcvtd_f64_u64(uint64_t a) {
8896 return vcvtd_f64_u64(a);
8897 }
8898
8899 // CHECK-LABEL: @test_vrecpes_f32(
8900 // CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
8901 // CHECK: ret float [[VRECPES_F32_I]]
test_vrecpes_f32(float32_t a)8902 float32_t test_vrecpes_f32(float32_t a) {
8903 return vrecpes_f32(a);
8904 }
8905
8906 // CHECK-LABEL: @test_vrecped_f64(
8907 // CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
8908 // CHECK: ret double [[VRECPED_F64_I]]
test_vrecped_f64(float64_t a)8909 float64_t test_vrecped_f64(float64_t a) {
8910 return vrecped_f64(a);
8911 }
8912
8913 // CHECK-LABEL: @test_vrecpxs_f32(
8914 // CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
8915 // CHECK: ret float [[VRECPXS_F32_I]]
test_vrecpxs_f32(float32_t a)8916 float32_t test_vrecpxs_f32(float32_t a) {
8917 return vrecpxs_f32(a);
8918 }
8919
8920 // CHECK-LABEL: @test_vrecpxd_f64(
8921 // CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
8922 // CHECK: ret double [[VRECPXD_F64_I]]
test_vrecpxd_f64(float64_t a)8923 float64_t test_vrecpxd_f64(float64_t a) {
8924 return vrecpxd_f64(a);
8925 }
8926
8927 // CHECK-LABEL: @test_vrsqrte_u32(
8928 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8929 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
8930 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)8931 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
8932 return vrsqrte_u32(a);
8933 }
8934
8935 // CHECK-LABEL: @test_vrsqrteq_u32(
8936 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8937 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
8938 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)8939 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
8940 return vrsqrteq_u32(a);
8941 }
8942
8943 // CHECK-LABEL: @test_vrsqrtes_f32(
8944 // CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
8945 // CHECK: ret float [[VRSQRTES_F32_I]]
test_vrsqrtes_f32(float32_t a)8946 float32_t test_vrsqrtes_f32(float32_t a) {
8947 return vrsqrtes_f32(a);
8948 }
8949
8950 // CHECK-LABEL: @test_vrsqrted_f64(
8951 // CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
8952 // CHECK: ret double [[VRSQRTED_F64_I]]
test_vrsqrted_f64(float64_t a)8953 float64_t test_vrsqrted_f64(float64_t a) {
8954 return vrsqrted_f64(a);
8955 }
8956
8957 // CHECK-LABEL: @test_vld1q_u8(
8958 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
8959 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
8960 // CHECK: ret <16 x i8> [[TMP1]]
test_vld1q_u8(uint8_t const * a)8961 uint8x16_t test_vld1q_u8(uint8_t const *a) {
8962 return vld1q_u8(a);
8963 }
8964
8965 // CHECK-LABEL: @test_vld1q_u16(
8966 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
8967 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
8968 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
8969 // CHECK: ret <8 x i16> [[TMP2]]
test_vld1q_u16(uint16_t const * a)8970 uint16x8_t test_vld1q_u16(uint16_t const *a) {
8971 return vld1q_u16(a);
8972 }
8973
8974 // CHECK-LABEL: @test_vld1q_u32(
8975 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
8976 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
8977 // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
8978 // CHECK: ret <4 x i32> [[TMP2]]
test_vld1q_u32(uint32_t const * a)8979 uint32x4_t test_vld1q_u32(uint32_t const *a) {
8980 return vld1q_u32(a);
8981 }
8982
8983 // CHECK-LABEL: @test_vld1q_u64(
8984 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
8985 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
8986 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
8987 // CHECK: ret <2 x i64> [[TMP2]]
test_vld1q_u64(uint64_t const * a)8988 uint64x2_t test_vld1q_u64(uint64_t const *a) {
8989 return vld1q_u64(a);
8990 }
8991
8992 // CHECK-LABEL: @test_vld1q_s8(
8993 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
8994 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
8995 // CHECK: ret <16 x i8> [[TMP1]]
test_vld1q_s8(int8_t const * a)8996 int8x16_t test_vld1q_s8(int8_t const *a) {
8997 return vld1q_s8(a);
8998 }
8999
9000 // CHECK-LABEL: @test_vld1q_s16(
9001 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9002 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9003 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9004 // CHECK: ret <8 x i16> [[TMP2]]
test_vld1q_s16(int16_t const * a)9005 int16x8_t test_vld1q_s16(int16_t const *a) {
9006 return vld1q_s16(a);
9007 }
9008
9009 // CHECK-LABEL: @test_vld1q_s32(
9010 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
9011 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9012 // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9013 // CHECK: ret <4 x i32> [[TMP2]]
test_vld1q_s32(int32_t const * a)9014 int32x4_t test_vld1q_s32(int32_t const *a) {
9015 return vld1q_s32(a);
9016 }
9017
9018 // CHECK-LABEL: @test_vld1q_s64(
9019 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
9020 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9021 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9022 // CHECK: ret <2 x i64> [[TMP2]]
test_vld1q_s64(int64_t const * a)9023 int64x2_t test_vld1q_s64(int64_t const *a) {
9024 return vld1q_s64(a);
9025 }
9026
9027 // CHECK-LABEL: @test_vld1q_f16(
9028 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
9029 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
9030 // CHECK: [[TMP2:%.*]] = load <8 x half>, <8 x half>* [[TMP1]]
9031 // CHECK: ret <8 x half> [[TMP2]]
test_vld1q_f16(float16_t const * a)9032 float16x8_t test_vld1q_f16(float16_t const *a) {
9033 return vld1q_f16(a);
9034 }
9035
9036 // CHECK-LABEL: @test_vld1q_f32(
9037 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
9038 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
9039 // CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]]
9040 // CHECK: ret <4 x float> [[TMP2]]
test_vld1q_f32(float32_t const * a)9041 float32x4_t test_vld1q_f32(float32_t const *a) {
9042 return vld1q_f32(a);
9043 }
9044
9045 // CHECK-LABEL: @test_vld1q_f64(
9046 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
9047 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
9048 // CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]]
9049 // CHECK: ret <2 x double> [[TMP2]]
test_vld1q_f64(float64_t const * a)9050 float64x2_t test_vld1q_f64(float64_t const *a) {
9051 return vld1q_f64(a);
9052 }
9053
9054 // CHECK-LABEL: @test_vld1q_p8(
9055 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9056 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9057 // CHECK: ret <16 x i8> [[TMP1]]
test_vld1q_p8(poly8_t const * a)9058 poly8x16_t test_vld1q_p8(poly8_t const *a) {
9059 return vld1q_p8(a);
9060 }
9061
9062 // CHECK-LABEL: @test_vld1q_p16(
9063 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9064 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9065 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9066 // CHECK: ret <8 x i16> [[TMP2]]
test_vld1q_p16(poly16_t const * a)9067 poly16x8_t test_vld1q_p16(poly16_t const *a) {
9068 return vld1q_p16(a);
9069 }
9070
9071 // CHECK-LABEL: @test_vld1_u8(
9072 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9073 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9074 // CHECK: ret <8 x i8> [[TMP1]]
test_vld1_u8(uint8_t const * a)9075 uint8x8_t test_vld1_u8(uint8_t const *a) {
9076 return vld1_u8(a);
9077 }
9078
9079 // CHECK-LABEL: @test_vld1_u16(
9080 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9081 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9082 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9083 // CHECK: ret <4 x i16> [[TMP2]]
test_vld1_u16(uint16_t const * a)9084 uint16x4_t test_vld1_u16(uint16_t const *a) {
9085 return vld1_u16(a);
9086 }
9087
9088 // CHECK-LABEL: @test_vld1_u32(
9089 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
9090 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9091 // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
9092 // CHECK: ret <2 x i32> [[TMP2]]
test_vld1_u32(uint32_t const * a)9093 uint32x2_t test_vld1_u32(uint32_t const *a) {
9094 return vld1_u32(a);
9095 }
9096
9097 // CHECK-LABEL: @test_vld1_u64(
9098 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
9099 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9100 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
9101 // CHECK: ret <1 x i64> [[TMP2]]
test_vld1_u64(uint64_t const * a)9102 uint64x1_t test_vld1_u64(uint64_t const *a) {
9103 return vld1_u64(a);
9104 }
9105
9106 // CHECK-LABEL: @test_vld1_s8(
9107 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9108 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9109 // CHECK: ret <8 x i8> [[TMP1]]
test_vld1_s8(int8_t const * a)9110 int8x8_t test_vld1_s8(int8_t const *a) {
9111 return vld1_s8(a);
9112 }
9113
9114 // CHECK-LABEL: @test_vld1_s16(
9115 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9116 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9117 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9118 // CHECK: ret <4 x i16> [[TMP2]]
test_vld1_s16(int16_t const * a)9119 int16x4_t test_vld1_s16(int16_t const *a) {
9120 return vld1_s16(a);
9121 }
9122
9123 // CHECK-LABEL: @test_vld1_s32(
9124 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
9125 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
9126 // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
9127 // CHECK: ret <2 x i32> [[TMP2]]
test_vld1_s32(int32_t const * a)9128 int32x2_t test_vld1_s32(int32_t const *a) {
9129 return vld1_s32(a);
9130 }
9131
9132 // CHECK-LABEL: @test_vld1_s64(
9133 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
9134 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
9135 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
9136 // CHECK: ret <1 x i64> [[TMP2]]
test_vld1_s64(int64_t const * a)9137 int64x1_t test_vld1_s64(int64_t const *a) {
9138 return vld1_s64(a);
9139 }
9140
9141 // CHECK-LABEL: @test_vld1_f16(
9142 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
9143 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
9144 // CHECK: [[TMP2:%.*]] = load <4 x half>, <4 x half>* [[TMP1]]
9145 // CHECK: ret <4 x half> [[TMP2]]
test_vld1_f16(float16_t const * a)9146 float16x4_t test_vld1_f16(float16_t const *a) {
9147 return vld1_f16(a);
9148 }
9149
9150 // CHECK-LABEL: @test_vld1_f32(
9151 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
9152 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
9153 // CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]]
9154 // CHECK: ret <2 x float> [[TMP2]]
test_vld1_f32(float32_t const * a)9155 float32x2_t test_vld1_f32(float32_t const *a) {
9156 return vld1_f32(a);
9157 }
9158
9159 // CHECK-LABEL: @test_vld1_f64(
9160 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
9161 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
9162 // CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]]
9163 // CHECK: ret <1 x double> [[TMP2]]
test_vld1_f64(float64_t const * a)9164 float64x1_t test_vld1_f64(float64_t const *a) {
9165 return vld1_f64(a);
9166 }
9167
9168 // CHECK-LABEL: @test_vld1_p8(
9169 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
9170 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
9171 // CHECK: ret <8 x i8> [[TMP1]]
test_vld1_p8(poly8_t const * a)9172 poly8x8_t test_vld1_p8(poly8_t const *a) {
9173 return vld1_p8(a);
9174 }
9175
9176 // CHECK-LABEL: @test_vld1_p16(
9177 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
9178 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
9179 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
9180 // CHECK: ret <4 x i16> [[TMP2]]
test_vld1_p16(poly16_t const * a)9181 poly16x4_t test_vld1_p16(poly16_t const *a) {
9182 return vld1_p16(a);
9183 }
9184
9185 // CHECK-LABEL: @test_vld2q_u8(
9186 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
9187 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
9188 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9189 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9190 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9191 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9192 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9193 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
9194 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
9195 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9196 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
9197 // CHECK: ret %struct.uint8x16x2_t [[TMP5]]
test_vld2q_u8(uint8_t const * a)9198 uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
9199 return vld2q_u8(a);
9200 }
9201
9202 // CHECK-LABEL: @test_vld2q_u16(
9203 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
9204 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
9205 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9206 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9207 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9208 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9209 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9210 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9211 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
9212 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
9213 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9214 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
9215 // CHECK: ret %struct.uint16x8x2_t [[TMP6]]
test_vld2q_u16(uint16_t const * a)9216 uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
9217 return vld2q_u16(a);
9218 }
9219
9220 // CHECK-LABEL: @test_vld2q_u32(
9221 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
9222 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
9223 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9224 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9225 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9226 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9227 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9228 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9229 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
9230 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
9231 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9232 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
9233 // CHECK: ret %struct.uint32x4x2_t [[TMP6]]
test_vld2q_u32(uint32_t const * a)9234 uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
9235 return vld2q_u32(a);
9236 }
9237
9238 // CHECK-LABEL: @test_vld2q_u64(
9239 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
9240 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
9241 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9242 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9243 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9244 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9245 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9246 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9247 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
9248 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
9249 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9250 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
9251 // CHECK: ret %struct.uint64x2x2_t [[TMP6]]
test_vld2q_u64(uint64_t const * a)9252 uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
9253 return vld2q_u64(a);
9254 }
9255
9256 // CHECK-LABEL: @test_vld2q_s8(
9257 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
9258 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
9259 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9260 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9261 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9262 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9263 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9264 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
9265 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
9266 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9267 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
9268 // CHECK: ret %struct.int8x16x2_t [[TMP5]]
test_vld2q_s8(int8_t const * a)9269 int8x16x2_t test_vld2q_s8(int8_t const *a) {
9270 return vld2q_s8(a);
9271 }
9272
9273 // CHECK-LABEL: @test_vld2q_s16(
9274 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
9275 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
9276 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9277 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9278 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9279 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9280 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9281 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9282 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
9283 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
9284 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9285 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
9286 // CHECK: ret %struct.int16x8x2_t [[TMP6]]
test_vld2q_s16(int16_t const * a)9287 int16x8x2_t test_vld2q_s16(int16_t const *a) {
9288 return vld2q_s16(a);
9289 }
9290
9291 // CHECK-LABEL: @test_vld2q_s32(
9292 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
9293 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
9294 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9295 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9296 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9297 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9298 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
9299 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
9300 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
9301 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
9302 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9303 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
9304 // CHECK: ret %struct.int32x4x2_t [[TMP6]]
test_vld2q_s32(int32_t const * a)9305 int32x4x2_t test_vld2q_s32(int32_t const *a) {
9306 return vld2q_s32(a);
9307 }
9308
9309 // CHECK-LABEL: @test_vld2q_s64(
9310 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
9311 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
9312 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9313 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9314 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9315 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9316 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
9317 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
9318 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
9319 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
9320 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9321 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
9322 // CHECK: ret %struct.int64x2x2_t [[TMP6]]
test_vld2q_s64(int64_t const * a)9323 int64x2x2_t test_vld2q_s64(int64_t const *a) {
9324 return vld2q_s64(a);
9325 }
9326
9327 // CHECK-LABEL: @test_vld2q_f16(
9328 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
9329 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
9330 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9331 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
9332 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9333 // CHECK: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9334 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }*
9335 // CHECK: store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]]
9336 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
9337 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
9338 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9339 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
9340 // CHECK: ret %struct.float16x8x2_t [[TMP6]]
test_vld2q_f16(float16_t const * a)9341 float16x8x2_t test_vld2q_f16(float16_t const *a) {
9342 return vld2q_f16(a);
9343 }
9344
9345 // CHECK-LABEL: @test_vld2q_f32(
9346 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
9347 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
9348 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9349 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
9350 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9351 // CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9352 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
9353 // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
9354 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
9355 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
9356 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9357 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
9358 // CHECK: ret %struct.float32x4x2_t [[TMP6]]
test_vld2q_f32(float32_t const * a)9359 float32x4x2_t test_vld2q_f32(float32_t const *a) {
9360 return vld2q_f32(a);
9361 }
9362
9363 // CHECK-LABEL: @test_vld2q_f64(
9364 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
9365 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
9366 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9367 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
9368 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9369 // CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9370 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
9371 // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
9372 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
9373 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
9374 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9375 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
9376 // CHECK: ret %struct.float64x2x2_t [[TMP6]]
test_vld2q_f64(float64_t const * a)9377 float64x2x2_t test_vld2q_f64(float64_t const *a) {
9378 return vld2q_f64(a);
9379 }
9380
9381 // CHECK-LABEL: @test_vld2q_p8(
9382 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
9383 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
9384 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9385 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9386 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9387 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
9388 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
9389 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
9390 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
9391 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false)
9392 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
9393 // CHECK: ret %struct.poly8x16x2_t [[TMP5]]
test_vld2q_p8(poly8_t const * a)9394 poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
9395 return vld2q_p8(a);
9396 }
9397
9398 // CHECK-LABEL: @test_vld2q_p16(
9399 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
9400 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
9401 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9402 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9403 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9404 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9405 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
9406 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
9407 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
9408 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
9409 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
9410 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
9411 // CHECK: ret %struct.poly16x8x2_t [[TMP6]]
test_vld2q_p16(poly16_t const * a)9412 poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
9413 return vld2q_p16(a);
9414 }
9415
9416 // CHECK-LABEL: @test_vld2_u8(
9417 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
9418 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
9419 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9420 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9421 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9422 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9423 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9424 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
9425 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
9426 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9427 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
9428 // CHECK: ret %struct.uint8x8x2_t [[TMP5]]
test_vld2_u8(uint8_t const * a)9429 uint8x8x2_t test_vld2_u8(uint8_t const *a) {
9430 return vld2_u8(a);
9431 }
9432
9433 // CHECK-LABEL: @test_vld2_u16(
9434 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
9435 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
9436 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9437 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9438 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9439 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9440 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9441 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9442 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
9443 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
9444 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9445 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
9446 // CHECK: ret %struct.uint16x4x2_t [[TMP6]]
test_vld2_u16(uint16_t const * a)9447 uint16x4x2_t test_vld2_u16(uint16_t const *a) {
9448 return vld2_u16(a);
9449 }
9450
9451 // CHECK-LABEL: @test_vld2_u32(
9452 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
9453 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
9454 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9455 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9456 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9457 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9458 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9459 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9460 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
9461 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
9462 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9463 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
9464 // CHECK: ret %struct.uint32x2x2_t [[TMP6]]
test_vld2_u32(uint32_t const * a)9465 uint32x2x2_t test_vld2_u32(uint32_t const *a) {
9466 return vld2_u32(a);
9467 }
9468
9469 // CHECK-LABEL: @test_vld2_u64(
9470 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
9471 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
9472 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9473 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9474 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9475 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9476 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9477 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9478 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
9479 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
9480 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9481 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
9482 // CHECK: ret %struct.uint64x1x2_t [[TMP6]]
test_vld2_u64(uint64_t const * a)9483 uint64x1x2_t test_vld2_u64(uint64_t const *a) {
9484 return vld2_u64(a);
9485 }
9486
9487 // CHECK-LABEL: @test_vld2_s8(
9488 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
9489 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
9490 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9491 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9492 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9493 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9494 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9495 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
9496 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
9497 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9498 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
9499 // CHECK: ret %struct.int8x8x2_t [[TMP5]]
test_vld2_s8(int8_t const * a)9500 int8x8x2_t test_vld2_s8(int8_t const *a) {
9501 return vld2_s8(a);
9502 }
9503
9504 // CHECK-LABEL: @test_vld2_s16(
9505 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
9506 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
9507 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9508 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9509 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9510 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9511 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9512 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9513 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
9514 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
9515 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9516 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
9517 // CHECK: ret %struct.int16x4x2_t [[TMP6]]
test_vld2_s16(int16_t const * a)9518 int16x4x2_t test_vld2_s16(int16_t const *a) {
9519 return vld2_s16(a);
9520 }
9521
9522 // CHECK-LABEL: @test_vld2_s32(
9523 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
9524 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
9525 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9526 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9527 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9528 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9529 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
9530 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
9531 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
9532 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
9533 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9534 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
9535 // CHECK: ret %struct.int32x2x2_t [[TMP6]]
test_vld2_s32(int32_t const * a)9536 int32x2x2_t test_vld2_s32(int32_t const *a) {
9537 return vld2_s32(a);
9538 }
9539
9540 // CHECK-LABEL: @test_vld2_s64(
9541 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
9542 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
9543 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9544 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9545 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9546 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9547 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
9548 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
9549 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
9550 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
9551 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9552 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
9553 // CHECK: ret %struct.int64x1x2_t [[TMP6]]
test_vld2_s64(int64_t const * a)9554 int64x1x2_t test_vld2_s64(int64_t const *a) {
9555 return vld2_s64(a);
9556 }
9557
9558 // CHECK-LABEL: @test_vld2_f16(
9559 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
9560 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
9561 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9562 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
9563 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
9564 // CHECK: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>* [[TMP2]])
9565 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }*
9566 // CHECK: store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]]
9567 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
9568 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
9569 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9570 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
9571 // CHECK: ret %struct.float16x4x2_t [[TMP6]]
test_vld2_f16(float16_t const * a)9572 float16x4x2_t test_vld2_f16(float16_t const *a) {
9573 return vld2_f16(a);
9574 }
9575
9576 // CHECK-LABEL: @test_vld2_f32(
9577 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
9578 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
9579 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9580 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
9581 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
9582 // CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
9583 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
9584 // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
9585 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
9586 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
9587 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9588 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
9589 // CHECK: ret %struct.float32x2x2_t [[TMP6]]
test_vld2_f32(float32_t const * a)9590 float32x2x2_t test_vld2_f32(float32_t const *a) {
9591 return vld2_f32(a);
9592 }
9593
9594 // CHECK-LABEL: @test_vld2_f64(
9595 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
9596 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
9597 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9598 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
9599 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
9600 // CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]])
9601 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
9602 // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
9603 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
9604 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
9605 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9606 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
9607 // CHECK: ret %struct.float64x1x2_t [[TMP6]]
test_vld2_f64(float64_t const * a)9608 float64x1x2_t test_vld2_f64(float64_t const *a) {
9609 return vld2_f64(a);
9610 }
9611
9612 // CHECK-LABEL: @test_vld2_p8(
9613 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
9614 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
9615 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9616 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9617 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9618 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
9619 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
9620 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
9621 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
9622 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false)
9623 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
9624 // CHECK: ret %struct.poly8x8x2_t [[TMP5]]
test_vld2_p8(poly8_t const * a)9625 poly8x8x2_t test_vld2_p8(poly8_t const *a) {
9626 return vld2_p8(a);
9627 }
9628
9629 // CHECK-LABEL: @test_vld2_p16(
9630 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
9631 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
9632 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9633 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9634 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9635 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9636 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
9637 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
9638 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
9639 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
9640 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
9641 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
9642 // CHECK: ret %struct.poly16x4x2_t [[TMP6]]
test_vld2_p16(poly16_t const * a)9643 poly16x4x2_t test_vld2_p16(poly16_t const *a) {
9644 return vld2_p16(a);
9645 }
9646
9647 // CHECK-LABEL: @test_vld3q_u8(
9648 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
9649 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
9650 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9651 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9652 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9653 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9654 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9655 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
9656 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
9657 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9658 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
9659 // CHECK: ret %struct.uint8x16x3_t [[TMP5]]
test_vld3q_u8(uint8_t const * a)9660 uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
9661 return vld3q_u8(a);
9662 }
9663
9664 // CHECK-LABEL: @test_vld3q_u16(
9665 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
9666 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
9667 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9668 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9669 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9670 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9671 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9672 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9673 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
9674 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
9675 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9676 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
9677 // CHECK: ret %struct.uint16x8x3_t [[TMP6]]
test_vld3q_u16(uint16_t const * a)9678 uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
9679 return vld3q_u16(a);
9680 }
9681
9682 // CHECK-LABEL: @test_vld3q_u32(
9683 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
9684 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
9685 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9686 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9687 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9688 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9689 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9690 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9691 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
9692 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
9693 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9694 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
9695 // CHECK: ret %struct.uint32x4x3_t [[TMP6]]
test_vld3q_u32(uint32_t const * a)9696 uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
9697 return vld3q_u32(a);
9698 }
9699
9700 // CHECK-LABEL: @test_vld3q_u64(
9701 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
9702 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
9703 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9704 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9705 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9706 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9707 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9708 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9709 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
9710 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
9711 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9712 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
9713 // CHECK: ret %struct.uint64x2x3_t [[TMP6]]
test_vld3q_u64(uint64_t const * a)9714 uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
9715 return vld3q_u64(a);
9716 }
9717
9718 // CHECK-LABEL: @test_vld3q_s8(
9719 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
9720 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
9721 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9722 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9723 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9724 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9725 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9726 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
9727 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
9728 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9729 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
9730 // CHECK: ret %struct.int8x16x3_t [[TMP5]]
test_vld3q_s8(int8_t const * a)9731 int8x16x3_t test_vld3q_s8(int8_t const *a) {
9732 return vld3q_s8(a);
9733 }
9734
9735 // CHECK-LABEL: @test_vld3q_s16(
9736 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
9737 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
9738 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9739 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9740 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9741 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9742 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9743 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9744 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
9745 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
9746 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9747 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
9748 // CHECK: ret %struct.int16x8x3_t [[TMP6]]
test_vld3q_s16(int16_t const * a)9749 int16x8x3_t test_vld3q_s16(int16_t const *a) {
9750 return vld3q_s16(a);
9751 }
9752
9753 // CHECK-LABEL: @test_vld3q_s32(
9754 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
9755 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
9756 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9757 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9758 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
9759 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
9760 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
9761 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
9762 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
9763 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
9764 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9765 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
9766 // CHECK: ret %struct.int32x4x3_t [[TMP6]]
test_vld3q_s32(int32_t const * a)9767 int32x4x3_t test_vld3q_s32(int32_t const *a) {
9768 return vld3q_s32(a);
9769 }
9770
9771 // CHECK-LABEL: @test_vld3q_s64(
9772 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
9773 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
9774 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9775 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9776 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
9777 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
9778 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
9779 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
9780 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
9781 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
9782 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9783 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
9784 // CHECK: ret %struct.int64x2x3_t [[TMP6]]
test_vld3q_s64(int64_t const * a)9785 int64x2x3_t test_vld3q_s64(int64_t const *a) {
9786 return vld3q_s64(a);
9787 }
9788
9789 // CHECK-LABEL: @test_vld3q_f16(
9790 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
9791 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
9792 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9793 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
9794 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
9795 // CHECK: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>* [[TMP2]])
9796 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }*
9797 // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
9798 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
9799 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
9800 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9801 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
9802 // CHECK: ret %struct.float16x8x3_t [[TMP6]]
test_vld3q_f16(float16_t const * a)9803 float16x8x3_t test_vld3q_f16(float16_t const *a) {
9804 return vld3q_f16(a);
9805 }
9806
9807 // CHECK-LABEL: @test_vld3q_f32(
9808 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
9809 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
9810 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9811 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
9812 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
9813 // CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
9814 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
9815 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
9816 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
9817 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
9818 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9819 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
9820 // CHECK: ret %struct.float32x4x3_t [[TMP6]]
test_vld3q_f32(float32_t const * a)9821 float32x4x3_t test_vld3q_f32(float32_t const *a) {
9822 return vld3q_f32(a);
9823 }
9824
9825 // CHECK-LABEL: @test_vld3q_f64(
9826 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
9827 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
9828 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9829 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
9830 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
9831 // CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]])
9832 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
9833 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
9834 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
9835 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
9836 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9837 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
9838 // CHECK: ret %struct.float64x2x3_t [[TMP6]]
test_vld3q_f64(float64_t const * a)9839 float64x2x3_t test_vld3q_f64(float64_t const *a) {
9840 return vld3q_f64(a);
9841 }
9842
9843 // CHECK-LABEL: @test_vld3q_p8(
9844 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
9845 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
9846 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9847 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
9848 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
9849 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
9850 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
9851 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
9852 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
9853 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false)
9854 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
9855 // CHECK: ret %struct.poly8x16x3_t [[TMP5]]
test_vld3q_p8(poly8_t const * a)9856 poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
9857 return vld3q_p8(a);
9858 }
9859
9860 // CHECK-LABEL: @test_vld3q_p16(
9861 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
9862 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
9863 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9864 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9865 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
9866 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
9867 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
9868 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
9869 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
9870 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
9871 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
9872 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
9873 // CHECK: ret %struct.poly16x8x3_t [[TMP6]]
test_vld3q_p16(poly16_t const * a)9874 poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
9875 return vld3q_p16(a);
9876 }
9877
9878 // CHECK-LABEL: @test_vld3_u8(
9879 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
9880 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
9881 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9882 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9883 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9884 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
9885 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
9886 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
9887 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
9888 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
9889 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
9890 // CHECK: ret %struct.uint8x8x3_t [[TMP5]]
test_vld3_u8(uint8_t const * a)9891 uint8x8x3_t test_vld3_u8(uint8_t const *a) {
9892 return vld3_u8(a);
9893 }
9894
9895 // CHECK-LABEL: @test_vld3_u16(
9896 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
9897 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
9898 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
9899 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9900 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9901 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9902 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
9903 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
9904 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
9905 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
9906 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9907 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
9908 // CHECK: ret %struct.uint16x4x3_t [[TMP6]]
test_vld3_u16(uint16_t const * a)9909 uint16x4x3_t test_vld3_u16(uint16_t const *a) {
9910 return vld3_u16(a);
9911 }
9912
9913 // CHECK-LABEL: @test_vld3_u32(
9914 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
9915 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
9916 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
9917 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9918 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9919 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9920 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
9921 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
9922 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
9923 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
9924 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9925 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
9926 // CHECK: ret %struct.uint32x2x3_t [[TMP6]]
test_vld3_u32(uint32_t const * a)9927 uint32x2x3_t test_vld3_u32(uint32_t const *a) {
9928 return vld3_u32(a);
9929 }
9930
9931 // CHECK-LABEL: @test_vld3_u64(
9932 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
9933 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
9934 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
9935 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
9936 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
9937 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
9938 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
9939 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
9940 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
9941 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
9942 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9943 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
9944 // CHECK: ret %struct.uint64x1x3_t [[TMP6]]
test_vld3_u64(uint64_t const * a)9945 uint64x1x3_t test_vld3_u64(uint64_t const *a) {
9946 return vld3_u64(a);
9947 }
9948
9949 // CHECK-LABEL: @test_vld3_s8(
9950 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
9951 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
9952 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
9953 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
9954 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
9955 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
9956 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
9957 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
9958 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
9959 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
9960 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
9961 // CHECK: ret %struct.int8x8x3_t [[TMP5]]
test_vld3_s8(int8_t const * a)9962 int8x8x3_t test_vld3_s8(int8_t const *a) {
9963 return vld3_s8(a);
9964 }
9965
9966 // CHECK-LABEL: @test_vld3_s16(
9967 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
9968 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
9969 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
9970 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
9971 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
9972 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
9973 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
9974 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
9975 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
9976 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
9977 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9978 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
9979 // CHECK: ret %struct.int16x4x3_t [[TMP6]]
test_vld3_s16(int16_t const * a)9980 int16x4x3_t test_vld3_s16(int16_t const *a) {
9981 return vld3_s16(a);
9982 }
9983
9984 // CHECK-LABEL: @test_vld3_s32(
9985 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
9986 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
9987 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
9988 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
9989 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
9990 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
9991 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
9992 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
9993 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
9994 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
9995 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
9996 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
9997 // CHECK: ret %struct.int32x2x3_t [[TMP6]]
test_vld3_s32(int32_t const * a)9998 int32x2x3_t test_vld3_s32(int32_t const *a) {
9999 return vld3_s32(a);
10000 }
10001
10002 // CHECK-LABEL: @test_vld3_s64(
10003 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
10004 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
10005 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10006 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10007 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10008 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10009 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10010 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10011 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
10012 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10013 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10014 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
10015 // CHECK: ret %struct.int64x1x3_t [[TMP6]]
test_vld3_s64(int64_t const * a)10016 int64x1x3_t test_vld3_s64(int64_t const *a) {
10017 return vld3_s64(a);
10018 }
10019
10020 // CHECK-LABEL: @test_vld3_f16(
10021 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
10022 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
10023 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10024 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10025 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10026 // CHECK: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10027 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }*
10028 // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10029 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
10030 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10031 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10032 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
10033 // CHECK: ret %struct.float16x4x3_t [[TMP6]]
test_vld3_f16(float16_t const * a)10034 float16x4x3_t test_vld3_f16(float16_t const *a) {
10035 return vld3_f16(a);
10036 }
10037
10038 // CHECK-LABEL: @test_vld3_f32(
10039 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
10040 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
10041 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10042 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10043 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10044 // CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10045 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
10046 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10047 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
10048 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10049 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10050 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
10051 // CHECK: ret %struct.float32x2x3_t [[TMP6]]
test_vld3_f32(float32_t const * a)10052 float32x2x3_t test_vld3_f32(float32_t const *a) {
10053 return vld3_f32(a);
10054 }
10055
10056 // CHECK-LABEL: @test_vld3_f64(
10057 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
10058 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
10059 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10060 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10061 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10062 // CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10063 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
10064 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10065 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
10066 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10067 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10068 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
10069 // CHECK: ret %struct.float64x1x3_t [[TMP6]]
test_vld3_f64(float64_t const * a)10070 float64x1x3_t test_vld3_f64(float64_t const *a) {
10071 return vld3_f64(a);
10072 }
10073
10074 // CHECK-LABEL: @test_vld3_p8(
10075 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
10076 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
10077 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10078 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10079 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10080 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10081 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10082 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
10083 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
10084 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false)
10085 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
10086 // CHECK: ret %struct.poly8x8x3_t [[TMP5]]
test_vld3_p8(poly8_t const * a)10087 poly8x8x3_t test_vld3_p8(poly8_t const *a) {
10088 return vld3_p8(a);
10089 }
10090
10091 // CHECK-LABEL: @test_vld3_p16(
10092 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
10093 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
10094 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10095 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10096 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10097 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10098 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10099 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10100 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
10101 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
10102 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
10103 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
10104 // CHECK: ret %struct.poly16x4x3_t [[TMP6]]
test_vld3_p16(poly16_t const * a)10105 poly16x4x3_t test_vld3_p16(poly16_t const *a) {
10106 return vld3_p16(a);
10107 }
10108
10109 // CHECK-LABEL: @test_vld4q_u8(
10110 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
10111 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
10112 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10113 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10114 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10115 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10116 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10117 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
10118 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
10119 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10120 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
10121 // CHECK: ret %struct.uint8x16x4_t [[TMP5]]
test_vld4q_u8(uint8_t const * a)10122 uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
10123 return vld4q_u8(a);
10124 }
10125
10126 // CHECK-LABEL: @test_vld4q_u16(
10127 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
10128 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
10129 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10130 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10131 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10132 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10133 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10134 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10135 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
10136 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
10137 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10138 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
10139 // CHECK: ret %struct.uint16x8x4_t [[TMP6]]
test_vld4q_u16(uint16_t const * a)10140 uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
10141 return vld4q_u16(a);
10142 }
10143
10144 // CHECK-LABEL: @test_vld4q_u32(
10145 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
10146 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
10147 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10148 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10149 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10150 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10151 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10152 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10153 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
10154 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
10155 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10156 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
10157 // CHECK: ret %struct.uint32x4x4_t [[TMP6]]
test_vld4q_u32(uint32_t const * a)10158 uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
10159 return vld4q_u32(a);
10160 }
10161
10162 // CHECK-LABEL: @test_vld4q_u64(
10163 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
10164 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
10165 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10166 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10167 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10168 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10169 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10170 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10171 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
10172 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
10173 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10174 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
10175 // CHECK: ret %struct.uint64x2x4_t [[TMP6]]
test_vld4q_u64(uint64_t const * a)10176 uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
10177 return vld4q_u64(a);
10178 }
10179
10180 // CHECK-LABEL: @test_vld4q_s8(
10181 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
10182 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
10183 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10184 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10185 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10186 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10187 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10188 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
10189 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
10190 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10191 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
10192 // CHECK: ret %struct.int8x16x4_t [[TMP5]]
test_vld4q_s8(int8_t const * a)10193 int8x16x4_t test_vld4q_s8(int8_t const *a) {
10194 return vld4q_s8(a);
10195 }
10196
10197 // CHECK-LABEL: @test_vld4q_s16(
10198 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
10199 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
10200 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10201 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10202 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10203 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10204 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10205 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10206 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
10207 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
10208 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10209 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
10210 // CHECK: ret %struct.int16x8x4_t [[TMP6]]
test_vld4q_s16(int16_t const * a)10211 int16x8x4_t test_vld4q_s16(int16_t const *a) {
10212 return vld4q_s16(a);
10213 }
10214
10215 // CHECK-LABEL: @test_vld4q_s32(
10216 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
10217 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
10218 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10219 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10220 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10221 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10222 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
10223 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10224 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
10225 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
10226 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10227 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
10228 // CHECK: ret %struct.int32x4x4_t [[TMP6]]
test_vld4q_s32(int32_t const * a)10229 int32x4x4_t test_vld4q_s32(int32_t const *a) {
10230 return vld4q_s32(a);
10231 }
10232
10233 // CHECK-LABEL: @test_vld4q_s64(
10234 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
10235 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
10236 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10237 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10238 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10239 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10240 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
10241 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10242 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
10243 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
10244 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10245 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
10246 // CHECK: ret %struct.int64x2x4_t [[TMP6]]
test_vld4q_s64(int64_t const * a)10247 int64x2x4_t test_vld4q_s64(int64_t const *a) {
10248 return vld4q_s64(a);
10249 }
10250
10251 // CHECK-LABEL: @test_vld4q_f16(
10252 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
10253 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
10254 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10255 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10256 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x half>*
10257 // CHECK: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>* [[TMP2]])
10258 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }*
10259 // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]]
10260 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
10261 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
10262 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10263 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
10264 // CHECK: ret %struct.float16x8x4_t [[TMP6]]
test_vld4q_f16(float16_t const * a)10265 float16x8x4_t test_vld4q_f16(float16_t const *a) {
10266 return vld4q_f16(a);
10267 }
10268
10269 // CHECK-LABEL: @test_vld4q_f32(
10270 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
10271 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
10272 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10273 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10274 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10275 // CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10276 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
10277 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
10278 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
10279 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
10280 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10281 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
10282 // CHECK: ret %struct.float32x4x4_t [[TMP6]]
test_vld4q_f32(float32_t const * a)10283 float32x4x4_t test_vld4q_f32(float32_t const *a) {
10284 return vld4q_f32(a);
10285 }
10286
10287 // CHECK-LABEL: @test_vld4q_f64(
10288 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
10289 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
10290 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10291 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10292 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10293 // CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10294 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
10295 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
10296 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
10297 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
10298 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10299 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
10300 // CHECK: ret %struct.float64x2x4_t [[TMP6]]
test_vld4q_f64(float64_t const * a)10301 float64x2x4_t test_vld4q_f64(float64_t const *a) {
10302 return vld4q_f64(a);
10303 }
10304
10305 // CHECK-LABEL: @test_vld4q_p8(
10306 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
10307 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
10308 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10309 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10310 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10311 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
10312 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10313 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
10314 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
10315 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false)
10316 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
10317 // CHECK: ret %struct.poly8x16x4_t [[TMP5]]
test_vld4q_p8(poly8_t const * a)10318 poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
10319 return vld4q_p8(a);
10320 }
10321
10322 // CHECK-LABEL: @test_vld4q_p16(
10323 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
10324 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
10325 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10326 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10327 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10328 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10329 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
10330 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10331 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
10332 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
10333 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
10334 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
10335 // CHECK: ret %struct.poly16x8x4_t [[TMP6]]
test_vld4q_p16(poly16_t const * a)10336 poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
10337 return vld4q_p16(a);
10338 }
10339
10340 // CHECK-LABEL: @test_vld4_u8(
10341 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
10342 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
10343 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10344 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10345 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10346 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10347 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10348 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
10349 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
10350 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10351 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
10352 // CHECK: ret %struct.uint8x8x4_t [[TMP5]]
test_vld4_u8(uint8_t const * a)10353 uint8x8x4_t test_vld4_u8(uint8_t const *a) {
10354 return vld4_u8(a);
10355 }
10356
10357 // CHECK-LABEL: @test_vld4_u16(
10358 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
10359 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
10360 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10361 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10362 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10363 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10364 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10365 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10366 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
10367 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
10368 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10369 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
10370 // CHECK: ret %struct.uint16x4x4_t [[TMP6]]
test_vld4_u16(uint16_t const * a)10371 uint16x4x4_t test_vld4_u16(uint16_t const *a) {
10372 return vld4_u16(a);
10373 }
10374
10375 // CHECK-LABEL: @test_vld4_u32(
10376 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
10377 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
10378 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10379 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10380 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10381 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10382 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10383 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10384 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
10385 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
10386 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10387 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
10388 // CHECK: ret %struct.uint32x2x4_t [[TMP6]]
test_vld4_u32(uint32_t const * a)10389 uint32x2x4_t test_vld4_u32(uint32_t const *a) {
10390 return vld4_u32(a);
10391 }
10392
10393 // CHECK-LABEL: @test_vld4_u64(
10394 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
10395 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
10396 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10397 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10398 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10399 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10400 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10401 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10402 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
10403 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
10404 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10405 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
10406 // CHECK: ret %struct.uint64x1x4_t [[TMP6]]
test_vld4_u64(uint64_t const * a)10407 uint64x1x4_t test_vld4_u64(uint64_t const *a) {
10408 return vld4_u64(a);
10409 }
10410
10411 // CHECK-LABEL: @test_vld4_s8(
10412 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
10413 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
10414 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10415 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10416 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10417 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10418 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10419 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
10420 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
10421 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10422 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
10423 // CHECK: ret %struct.int8x8x4_t [[TMP5]]
test_vld4_s8(int8_t const * a)10424 int8x8x4_t test_vld4_s8(int8_t const *a) {
10425 return vld4_s8(a);
10426 }
10427
10428 // CHECK-LABEL: @test_vld4_s16(
10429 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
10430 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
10431 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10432 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10433 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10434 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10435 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10436 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10437 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
10438 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
10439 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10440 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
10441 // CHECK: ret %struct.int16x4x4_t [[TMP6]]
test_vld4_s16(int16_t const * a)10442 int16x4x4_t test_vld4_s16(int16_t const *a) {
10443 return vld4_s16(a);
10444 }
10445
10446 // CHECK-LABEL: @test_vld4_s32(
10447 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
10448 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
10449 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10450 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
10451 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10452 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10453 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
10454 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10455 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
10456 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
10457 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10458 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
10459 // CHECK: ret %struct.int32x2x4_t [[TMP6]]
test_vld4_s32(int32_t const * a)10460 int32x2x4_t test_vld4_s32(int32_t const *a) {
10461 return vld4_s32(a);
10462 }
10463
10464 // CHECK-LABEL: @test_vld4_s64(
10465 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
10466 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
10467 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10468 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
10469 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10470 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10471 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
10472 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10473 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
10474 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
10475 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10476 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
10477 // CHECK: ret %struct.int64x1x4_t [[TMP6]]
test_vld4_s64(int64_t const * a)10478 int64x1x4_t test_vld4_s64(int64_t const *a) {
10479 return vld4_s64(a);
10480 }
10481
10482 // CHECK-LABEL: @test_vld4_f16(
10483 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
10484 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
10485 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10486 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
10487 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x half>*
10488 // CHECK: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>* [[TMP2]])
10489 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }*
10490 // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]]
10491 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
10492 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
10493 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10494 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
10495 // CHECK: ret %struct.float16x4x4_t [[TMP6]]
test_vld4_f16(float16_t const * a)10496 float16x4x4_t test_vld4_f16(float16_t const *a) {
10497 return vld4_f16(a);
10498 }
10499
10500 // CHECK-LABEL: @test_vld4_f32(
10501 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
10502 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
10503 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10504 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
10505 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10506 // CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10507 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
10508 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10509 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
10510 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
10511 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10512 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
10513 // CHECK: ret %struct.float32x2x4_t [[TMP6]]
test_vld4_f32(float32_t const * a)10514 float32x2x4_t test_vld4_f32(float32_t const *a) {
10515 return vld4_f32(a);
10516 }
10517
10518 // CHECK-LABEL: @test_vld4_f64(
10519 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
10520 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
10521 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10522 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
10523 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10524 // CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10525 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
10526 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10527 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
10528 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
10529 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10530 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
10531 // CHECK: ret %struct.float64x1x4_t [[TMP6]]
test_vld4_f64(float64_t const * a)10532 float64x1x4_t test_vld4_f64(float64_t const *a) {
10533 return vld4_f64(a);
10534 }
10535
10536 // CHECK-LABEL: @test_vld4_p8(
10537 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
10538 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
10539 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10540 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10541 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10542 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
10543 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10544 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
10545 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
10546 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false)
10547 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
10548 // CHECK: ret %struct.poly8x8x4_t [[TMP5]]
test_vld4_p8(poly8_t const * a)10549 poly8x8x4_t test_vld4_p8(poly8_t const *a) {
10550 return vld4_p8(a);
10551 }
10552
10553 // CHECK-LABEL: @test_vld4_p16(
10554 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
10555 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
10556 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10557 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
10558 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10559 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10560 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
10561 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10562 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
10563 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
10564 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
10565 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
10566 // CHECK: ret %struct.poly16x4x4_t [[TMP6]]
test_vld4_p16(poly16_t const * a)10567 poly16x4x4_t test_vld4_p16(poly16_t const *a) {
10568 return vld4_p16(a);
10569 }
10570
10571 // CHECK-LABEL: @test_vst1q_u8(
10572 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10573 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
10574 // CHECK: ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)10575 void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
10576 vst1q_u8(a, b);
10577 }
10578
10579 // CHECK-LABEL: @test_vst1q_u16(
10580 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10581 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10582 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10583 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10584 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10585 // CHECK: ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)10586 void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
10587 vst1q_u16(a, b);
10588 }
10589
10590 // CHECK-LABEL: @test_vst1q_u32(
10591 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10592 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10593 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10594 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10595 // CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10596 // CHECK: ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)10597 void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
10598 vst1q_u32(a, b);
10599 }
10600
10601 // CHECK-LABEL: @test_vst1q_u64(
10602 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10603 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10604 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10605 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10606 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10607 // CHECK: ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)10608 void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
10609 vst1q_u64(a, b);
10610 }
10611
10612 // CHECK-LABEL: @test_vst1q_s8(
10613 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10614 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
10615 // CHECK: ret void
test_vst1q_s8(int8_t * a,int8x16_t b)10616 void test_vst1q_s8(int8_t *a, int8x16_t b) {
10617 vst1q_s8(a, b);
10618 }
10619
10620 // CHECK-LABEL: @test_vst1q_s16(
10621 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10622 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10623 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10624 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10625 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10626 // CHECK: ret void
test_vst1q_s16(int16_t * a,int16x8_t b)10627 void test_vst1q_s16(int16_t *a, int16x8_t b) {
10628 vst1q_s16(a, b);
10629 }
10630
10631 // CHECK-LABEL: @test_vst1q_s32(
10632 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10633 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10634 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
10635 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10636 // CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
10637 // CHECK: ret void
test_vst1q_s32(int32_t * a,int32x4_t b)10638 void test_vst1q_s32(int32_t *a, int32x4_t b) {
10639 vst1q_s32(a, b);
10640 }
10641
10642 // CHECK-LABEL: @test_vst1q_s64(
10643 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10644 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10645 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
10646 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10647 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
10648 // CHECK: ret void
test_vst1q_s64(int64_t * a,int64x2_t b)10649 void test_vst1q_s64(int64_t *a, int64x2_t b) {
10650 vst1q_s64(a, b);
10651 }
10652
10653 // CHECK-LABEL: @test_vst1q_f16(
10654 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
10655 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
10656 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
10657 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
10658 // CHECK: store <8 x half> [[TMP3]], <8 x half>* [[TMP2]]
10659 // CHECK: ret void
test_vst1q_f16(float16_t * a,float16x8_t b)10660 void test_vst1q_f16(float16_t *a, float16x8_t b) {
10661 vst1q_f16(a, b);
10662 }
10663
10664 // CHECK-LABEL: @test_vst1q_f32(
10665 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
10666 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
10667 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
10668 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
10669 // CHECK: store <4 x float> [[TMP3]], <4 x float>* [[TMP2]]
10670 // CHECK: ret void
test_vst1q_f32(float32_t * a,float32x4_t b)10671 void test_vst1q_f32(float32_t *a, float32x4_t b) {
10672 vst1q_f32(a, b);
10673 }
10674
10675 // CHECK-LABEL: @test_vst1q_f64(
10676 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
10677 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
10678 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
10679 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
10680 // CHECK: store <2 x double> [[TMP3]], <2 x double>* [[TMP2]]
10681 // CHECK: ret void
test_vst1q_f64(float64_t * a,float64x2_t b)10682 void test_vst1q_f64(float64_t *a, float64x2_t b) {
10683 vst1q_f64(a, b);
10684 }
10685
10686 // CHECK-LABEL: @test_vst1q_p8(
10687 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
10688 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
10689 // CHECK: ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)10690 void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
10691 vst1q_p8(a, b);
10692 }
10693
10694 // CHECK-LABEL: @test_vst1q_p16(
10695 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10696 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10697 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
10698 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10699 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
10700 // CHECK: ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)10701 void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
10702 vst1q_p16(a, b);
10703 }
10704
10705 // CHECK-LABEL: @test_vst1_u8(
10706 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10707 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
10708 // CHECK: ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)10709 void test_vst1_u8(uint8_t *a, uint8x8_t b) {
10710 vst1_u8(a, b);
10711 }
10712
10713 // CHECK-LABEL: @test_vst1_u16(
10714 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10715 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10716 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10717 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10718 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10719 // CHECK: ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)10720 void test_vst1_u16(uint16_t *a, uint16x4_t b) {
10721 vst1_u16(a, b);
10722 }
10723
10724 // CHECK-LABEL: @test_vst1_u32(
10725 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10726 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10727 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10728 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10729 // CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10730 // CHECK: ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)10731 void test_vst1_u32(uint32_t *a, uint32x2_t b) {
10732 vst1_u32(a, b);
10733 }
10734
10735 // CHECK-LABEL: @test_vst1_u64(
10736 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10737 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10738 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10739 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10740 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10741 // CHECK: ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)10742 void test_vst1_u64(uint64_t *a, uint64x1_t b) {
10743 vst1_u64(a, b);
10744 }
10745
10746 // CHECK-LABEL: @test_vst1_s8(
10747 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10748 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
10749 // CHECK: ret void
test_vst1_s8(int8_t * a,int8x8_t b)10750 void test_vst1_s8(int8_t *a, int8x8_t b) {
10751 vst1_s8(a, b);
10752 }
10753
10754 // CHECK-LABEL: @test_vst1_s16(
10755 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10756 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10757 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10758 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10759 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10760 // CHECK: ret void
test_vst1_s16(int16_t * a,int16x4_t b)10761 void test_vst1_s16(int16_t *a, int16x4_t b) {
10762 vst1_s16(a, b);
10763 }
10764
10765 // CHECK-LABEL: @test_vst1_s32(
10766 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
10767 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10768 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10769 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10770 // CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
10771 // CHECK: ret void
test_vst1_s32(int32_t * a,int32x2_t b)10772 void test_vst1_s32(int32_t *a, int32x2_t b) {
10773 vst1_s32(a, b);
10774 }
10775
10776 // CHECK-LABEL: @test_vst1_s64(
10777 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
10778 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10779 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10780 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10781 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
10782 // CHECK: ret void
test_vst1_s64(int64_t * a,int64x1_t b)10783 void test_vst1_s64(int64_t *a, int64x1_t b) {
10784 vst1_s64(a, b);
10785 }
10786
10787 // CHECK-LABEL: @test_vst1_f16(
10788 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
10789 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
10790 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
10791 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
10792 // CHECK: store <4 x half> [[TMP3]], <4 x half>* [[TMP2]]
10793 // CHECK: ret void
test_vst1_f16(float16_t * a,float16x4_t b)10794 void test_vst1_f16(float16_t *a, float16x4_t b) {
10795 vst1_f16(a, b);
10796 }
10797
10798 // CHECK-LABEL: @test_vst1_f32(
10799 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
10800 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
10801 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
10802 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
10803 // CHECK: store <2 x float> [[TMP3]], <2 x float>* [[TMP2]]
10804 // CHECK: ret void
test_vst1_f32(float32_t * a,float32x2_t b)10805 void test_vst1_f32(float32_t *a, float32x2_t b) {
10806 vst1_f32(a, b);
10807 }
10808
10809 // CHECK-LABEL: @test_vst1_f64(
10810 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
10811 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
10812 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
10813 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
10814 // CHECK: store <1 x double> [[TMP3]], <1 x double>* [[TMP2]]
10815 // CHECK: ret void
test_vst1_f64(float64_t * a,float64x1_t b)10816 void test_vst1_f64(float64_t *a, float64x1_t b) {
10817 vst1_f64(a, b);
10818 }
10819
10820 // CHECK-LABEL: @test_vst1_p8(
10821 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10822 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
10823 // CHECK: ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)10824 void test_vst1_p8(poly8_t *a, poly8x8_t b) {
10825 vst1_p8(a, b);
10826 }
10827
10828 // CHECK-LABEL: @test_vst1_p16(
10829 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
10830 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10831 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10832 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10833 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
10834 // CHECK: ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)10835 void test_vst1_p16(poly16_t *a, poly16x4_t b) {
10836 vst1_p16(a, b);
10837 }
10838
10839 // CHECK-LABEL: @test_vst2q_u8(
10840 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
10841 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
10842 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
10843 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
10844 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
10845 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
10846 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10847 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10848 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
10849 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
10850 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
10851 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
10852 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
10853 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
10854 // CHECK: ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)10855 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
10856 vst2q_u8(a, b);
10857 }
10858
10859 // CHECK-LABEL: @test_vst2q_u16(
10860 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
10861 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
10862 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
10863 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
10864 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
10865 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
10866 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10867 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
10868 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10869 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
10870 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
10871 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10872 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
10873 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
10874 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
10875 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10876 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10877 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10878 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
10879 // CHECK: ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)10880 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
10881 vst2q_u16(a, b);
10882 }
10883
10884 // CHECK-LABEL: @test_vst2q_u32(
10885 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
10886 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
10887 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
10888 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
10889 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
10890 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
10891 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10892 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
10893 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
10894 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
10895 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
10896 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10897 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
10898 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
10899 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
10900 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10901 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10902 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10903 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
10904 // CHECK: ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)10905 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
10906 vst2q_u32(a, b);
10907 }
10908
10909 // CHECK-LABEL: @test_vst2q_u64(
10910 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
10911 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
10912 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
10913 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
10914 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
10915 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
10916 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10917 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
10918 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
10919 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
10920 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
10921 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
10922 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
10923 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
10924 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
10925 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
10926 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
10927 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
10928 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
10929 // CHECK: ret void
test_vst2q_u64(uint64_t * a,uint64x2x2_t b)10930 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
10931 vst2q_u64(a, b);
10932 }
10933
10934 // CHECK-LABEL: @test_vst2q_s8(
10935 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
10936 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
10937 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
10938 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
10939 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
10940 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
10941 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10942 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
10943 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
10944 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
10945 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
10946 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
10947 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
10948 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
10949 // CHECK: ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)10950 void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
10951 vst2q_s8(a, b);
10952 }
10953
10954 // CHECK-LABEL: @test_vst2q_s16(
10955 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
10956 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
10957 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
10958 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
10959 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
10960 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
10961 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10962 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
10963 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
10964 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
10965 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
10966 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10967 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
10968 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
10969 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
10970 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10971 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10972 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10973 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
10974 // CHECK: ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)10975 void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
10976 vst2q_s16(a, b);
10977 }
10978
10979 // CHECK-LABEL: @test_vst2q_s32(
10980 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
10981 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
10982 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
10983 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
10984 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
10985 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
10986 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
10987 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
10988 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
10989 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
10990 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
10991 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10992 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
10993 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
10994 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
10995 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10996 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10997 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10998 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
10999 // CHECK: ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)11000 void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
11001 vst2q_s32(a, b);
11002 }
11003
11004 // CHECK-LABEL: @test_vst2q_s64(
11005 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
11006 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
11007 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
11008 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11009 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
11010 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
11011 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11012 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11013 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11014 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11015 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11016 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11017 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11018 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11019 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11020 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11021 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11022 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11023 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11024 // CHECK: ret void
test_vst2q_s64(int64_t * a,int64x2x2_t b)11025 void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
11026 vst2q_s64(a, b);
11027 }
11028
11029 // CHECK-LABEL: @test_vst2q_f16(
11030 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
11031 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
11032 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
11033 // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
11034 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
11035 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
11036 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11037 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
11038 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11039 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
11040 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11041 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11042 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11043 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
11044 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11045 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11046 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11047 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11048 // CHECK: call void @llvm.aarch64.neon.st2.v8f16.p0i8(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i8* [[TMP2]])
11049 // CHECK: ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)11050 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
11051 vst2q_f16(a, b);
11052 }
11053
11054 // CHECK-LABEL: @test_vst2q_f32(
11055 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
11056 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
11057 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
11058 // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
11059 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
11060 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
11061 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11062 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
11063 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11064 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
11065 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11066 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11067 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11068 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
11069 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11070 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11071 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11072 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11073 // CHECK: call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]])
11074 // CHECK: ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)11075 void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
11076 vst2q_f32(a, b);
11077 }
11078
11079 // CHECK-LABEL: @test_vst2q_f64(
11080 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
11081 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
11082 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
11083 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
11084 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
11085 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
11086 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11087 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
11088 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11089 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
11090 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11091 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
11093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
11094 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11095 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11096 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11097 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11098 // CHECK: call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]])
11099 // CHECK: ret void
test_vst2q_f64(float64_t * a,float64x2x2_t b)11100 void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
11101 vst2q_f64(a, b);
11102 }
11103
11104 // CHECK-LABEL: @test_vst2q_p8(
11105 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
11106 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
11107 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
11108 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11109 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
11110 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
11111 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11112 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11113 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11114 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11115 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
11116 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11117 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11118 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11119 // CHECK: ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)11120 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
11121 vst2q_p8(a, b);
11122 }
11123
11124 // CHECK-LABEL: @test_vst2q_p16(
11125 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
11126 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
11127 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
11128 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11129 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
11130 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
11131 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
11132 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11133 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11134 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11135 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11136 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11137 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
11138 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11139 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11140 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11141 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11142 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11143 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11144 // CHECK: ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)11145 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
11146 vst2q_p16(a, b);
11147 }
11148
11149 // CHECK-LABEL: @test_vst2_u8(
11150 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
11151 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
11152 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
11153 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11154 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
11155 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
11156 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11157 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11158 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11159 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11160 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
11161 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11162 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11163 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11164 // CHECK: ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)11165 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
11166 vst2_u8(a, b);
11167 }
11168
11169 // CHECK-LABEL: @test_vst2_u16(
11170 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
11171 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
11172 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
11173 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11174 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
11175 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
11176 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11177 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11178 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11179 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11180 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11181 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11182 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
11183 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11184 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11185 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11186 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11187 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11188 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11189 // CHECK: ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)11190 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
11191 vst2_u16(a, b);
11192 }
11193
11194 // CHECK-LABEL: @test_vst2_u32(
11195 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
11196 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
11197 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
11198 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11199 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
11200 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
11201 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11202 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11203 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11204 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11205 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11206 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11207 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
11208 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11209 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11210 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11211 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11212 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11213 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11214 // CHECK: ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)11215 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
11216 vst2_u32(a, b);
11217 }
11218
11219 // CHECK-LABEL: @test_vst2_u64(
11220 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
11221 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
11222 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
11223 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11224 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
11225 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
11226 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11227 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11228 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11229 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11230 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11231 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11232 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
11233 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11234 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11235 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11236 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11237 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11238 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11239 // CHECK: ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)11240 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
11241 vst2_u64(a, b);
11242 }
11243
11244 // CHECK-LABEL: @test_vst2_s8(
11245 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
11246 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
11247 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
11248 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11249 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
11250 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
11251 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11252 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11253 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11254 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11255 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
11256 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11257 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11258 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11259 // CHECK: ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)11260 void test_vst2_s8(int8_t *a, int8x8x2_t b) {
11261 vst2_s8(a, b);
11262 }
11263
11264 // CHECK-LABEL: @test_vst2_s16(
11265 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
11266 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
11267 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
11268 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11269 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
11270 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
11271 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11272 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11273 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11274 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11275 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11276 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11277 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
11278 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11279 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11280 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11281 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11282 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11283 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11284 // CHECK: ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)11285 void test_vst2_s16(int16_t *a, int16x4x2_t b) {
11286 vst2_s16(a, b);
11287 }
11288
11289 // CHECK-LABEL: @test_vst2_s32(
11290 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
11291 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
11292 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
11293 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
11294 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
11295 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
11296 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11297 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11298 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11299 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
11300 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11301 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11302 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
11303 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11304 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11305 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11306 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11307 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11308 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
11309 // CHECK: ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)11310 void test_vst2_s32(int32_t *a, int32x2x2_t b) {
11311 vst2_s32(a, b);
11312 }
11313
11314 // CHECK-LABEL: @test_vst2_s64(
11315 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
11316 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
11317 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
11318 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
11319 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
11320 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
11321 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11322 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11323 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11324 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
11325 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11326 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11327 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
11328 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11329 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11330 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11331 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11332 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11333 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
11334 // CHECK: ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)11335 void test_vst2_s64(int64_t *a, int64x1x2_t b) {
11336 vst2_s64(a, b);
11337 }
11338
11339 // CHECK-LABEL: @test_vst2_f16(
11340 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
11341 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
11342 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
11343 // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
11344 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
11345 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
11346 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11347 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
11348 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11349 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
11350 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
11351 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
11352 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
11353 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
11354 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
11355 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
11356 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
11357 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
11358 // CHECK: call void @llvm.aarch64.neon.st2.v4f16.p0i8(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i8* [[TMP2]])
11359 // CHECK: ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)11360 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
11361 vst2_f16(a, b);
11362 }
11363
11364 // CHECK-LABEL: @test_vst2_f32(
11365 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
11366 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
11367 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
11368 // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
11369 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
11370 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
11371 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11372 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
11373 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11374 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
11375 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
11376 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
11377 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
11378 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
11379 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
11380 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
11381 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
11382 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
11383 // CHECK: call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]])
11384 // CHECK: ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)11385 void test_vst2_f32(float32_t *a, float32x2x2_t b) {
11386 vst2_f32(a, b);
11387 }
11388
11389 // CHECK-LABEL: @test_vst2_f64(
11390 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
11391 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
11392 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
11393 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
11394 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
11395 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
11396 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11397 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
11398 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11399 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
11400 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
11401 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
11402 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
11403 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
11404 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
11405 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
11406 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
11407 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
11408 // CHECK: call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]])
11409 // CHECK: ret void
test_vst2_f64(float64_t * a,float64x1x2_t b)11410 void test_vst2_f64(float64_t *a, float64x1x2_t b) {
11411 vst2_f64(a, b);
11412 }
11413
11414 // CHECK-LABEL: @test_vst2_p8(
11415 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
11416 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
11417 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
11418 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
11419 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
11420 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
11421 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11422 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11423 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
11424 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11425 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
11426 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11427 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11428 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
11429 // CHECK: ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)11430 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
11431 vst2_p8(a, b);
11432 }
11433
11434 // CHECK-LABEL: @test_vst2_p16(
11435 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
11436 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
11437 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
11438 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
11439 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
11440 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
11441 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
11442 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11443 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11444 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
11445 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11446 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11447 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
11448 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11449 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11450 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11451 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11452 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11453 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
11454 // CHECK: ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)11455 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
11456 vst2_p16(a, b);
11457 }
11458
11459 // CHECK-LABEL: @test_vst3q_u8(
11460 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
11461 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
11462 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
11463 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11464 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
11465 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
11466 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11467 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11468 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11469 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11470 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11471 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11472 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11473 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
11474 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11475 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11476 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11477 // CHECK: ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)11478 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
11479 vst3q_u8(a, b);
11480 }
11481
11482 // CHECK-LABEL: @test_vst3q_u16(
11483 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
11484 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
11485 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
11486 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11487 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
11488 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
11489 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11490 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11491 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11492 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11493 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11494 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11495 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11496 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11497 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11498 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11499 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
11500 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11501 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11502 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11503 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11504 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11505 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11506 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11507 // CHECK: ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)11508 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
11509 vst3q_u16(a, b);
11510 }
11511
11512 // CHECK-LABEL: @test_vst3q_u32(
11513 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
11514 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
11515 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
11516 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11517 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
11518 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
11519 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11520 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11521 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11522 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11523 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11524 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11525 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11526 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11527 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11528 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11529 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
11530 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11531 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11532 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11533 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11534 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11535 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11536 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11537 // CHECK: ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)11538 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
11539 vst3q_u32(a, b);
11540 }
11541
11542 // CHECK-LABEL: @test_vst3q_u64(
11543 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
11544 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
11545 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
11546 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11547 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
11548 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
11549 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11550 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11551 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11552 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11553 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11554 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11555 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11556 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11557 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11558 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11559 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
11560 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11561 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11562 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11563 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11564 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11565 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11566 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11567 // CHECK: ret void
test_vst3q_u64(uint64_t * a,uint64x2x3_t b)11568 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
11569 vst3q_u64(a, b);
11570 }
11571
11572 // CHECK-LABEL: @test_vst3q_s8(
11573 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
11574 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
11575 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
11576 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11577 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
11578 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
11579 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11580 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11581 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11582 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11583 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11584 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11585 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11586 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
11587 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11588 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11589 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11590 // CHECK: ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)11591 void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
11592 vst3q_s8(a, b);
11593 }
11594
11595 // CHECK-LABEL: @test_vst3q_s16(
11596 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
11597 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
11598 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
11599 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11600 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
11601 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
11602 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11603 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11604 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11605 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11606 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11607 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11608 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11609 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11610 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11611 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11612 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
11613 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11614 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11615 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11616 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11617 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11618 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11619 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11620 // CHECK: ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)11621 void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
11622 vst3q_s16(a, b);
11623 }
11624
11625 // CHECK-LABEL: @test_vst3q_s32(
11626 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
11627 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
11628 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
11629 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
11630 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
11631 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
11632 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11633 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11634 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11635 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
11636 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11637 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11638 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11639 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11640 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11641 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11642 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
11643 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
11644 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
11645 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11646 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11647 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11648 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11649 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
11650 // CHECK: ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)11651 void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
11652 vst3q_s32(a, b);
11653 }
11654
11655 // CHECK-LABEL: @test_vst3q_s64(
11656 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
11657 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
11658 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
11659 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
11660 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
11661 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
11662 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11663 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11664 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11665 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
11666 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11667 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11668 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11669 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11670 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11671 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11672 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
11673 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
11674 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
11675 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11676 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11677 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11678 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11679 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
11680 // CHECK: ret void
test_vst3q_s64(int64_t * a,int64x2x3_t b)11681 void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
11682 vst3q_s64(a, b);
11683 }
11684
11685 // CHECK-LABEL: @test_vst3q_f16(
11686 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
11687 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
11688 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
11689 // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
11690 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
11691 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
11692 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11693 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
11694 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11695 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
11696 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11697 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11698 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11699 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
11700 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11701 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11702 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
11703 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
11704 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
11705 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
11706 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11707 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11708 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
11709 // CHECK: call void @llvm.aarch64.neon.st3.v8f16.p0i8(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i8* [[TMP2]])
11710 // CHECK: ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)11711 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
11712 vst3q_f16(a, b);
11713 }
11714
11715 // CHECK-LABEL: @test_vst3q_f32(
11716 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
11717 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
11718 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
11719 // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
11720 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
11721 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
11722 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11723 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
11724 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11725 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
11726 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11727 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11728 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11729 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
11730 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
11731 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11732 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
11733 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
11734 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
11735 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
11736 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11737 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11738 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
11739 // CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]])
11740 // CHECK: ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)11741 void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
11742 vst3q_f32(a, b);
11743 }
11744
11745 // CHECK-LABEL: @test_vst3q_f64(
11746 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
11747 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
11748 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
11749 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
11750 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
11751 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
11752 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11753 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
11754 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11755 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
11756 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
11757 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11758 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11759 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
11760 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
11761 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11762 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
11763 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
11764 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
11765 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
11766 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11767 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11768 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
11769 // CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]])
11770 // CHECK: ret void
test_vst3q_f64(float64_t * a,float64x2x3_t b)11771 void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
11772 vst3q_f64(a, b);
11773 }
11774
11775 // CHECK-LABEL: @test_vst3q_p8(
11776 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
11777 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
11778 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
11779 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
11780 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
11781 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
11782 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11783 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11784 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
11785 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11786 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11787 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11788 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11789 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
11790 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
11791 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
11792 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
11793 // CHECK: ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)11794 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
11795 vst3q_p8(a, b);
11796 }
11797
11798 // CHECK-LABEL: @test_vst3q_p16(
11799 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
11800 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
11801 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
11802 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
11803 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
11804 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
11805 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
11806 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11807 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11808 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
11809 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11810 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11811 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11812 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11813 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11814 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11815 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
11816 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
11817 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
11818 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11819 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11820 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11821 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11822 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
11823 // CHECK: ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)11824 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
11825 vst3q_p16(a, b);
11826 }
11827
11828 // CHECK-LABEL: @test_vst3_u8(
11829 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
11830 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
11831 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
11832 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
11833 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
11834 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
11835 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11836 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11837 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
11838 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11839 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11840 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11841 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11842 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
11843 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
11844 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
11845 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
11846 // CHECK: ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)11847 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
11848 vst3_u8(a, b);
11849 }
11850
11851 // CHECK-LABEL: @test_vst3_u16(
11852 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
11853 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
11854 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
11855 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
11856 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
11857 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
11858 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11859 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11860 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11861 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
11862 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11863 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11864 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11865 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11866 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11867 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11868 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
11869 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
11870 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
11871 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11872 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11873 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11874 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11875 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
11876 // CHECK: ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)11877 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
11878 vst3_u16(a, b);
11879 }
11880
11881 // CHECK-LABEL: @test_vst3_u32(
11882 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
11883 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
11884 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
11885 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
11886 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
11887 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
11888 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11889 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
11890 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11891 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
11892 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
11893 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11894 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11895 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
11896 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
11897 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11898 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
11899 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
11900 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
11901 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
11902 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11903 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11904 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
11905 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
11906 // CHECK: ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)11907 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
11908 vst3_u32(a, b);
11909 }
11910
11911 // CHECK-LABEL: @test_vst3_u64(
11912 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
11913 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
11914 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
11915 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
11916 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
11917 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
11918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11919 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
11920 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11921 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
11922 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
11923 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11924 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11925 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
11926 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
11927 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11928 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
11929 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
11930 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
11931 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
11932 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11933 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11934 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
11935 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
11936 // CHECK: ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)11937 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
11938 vst3_u64(a, b);
11939 }
11940
11941 // CHECK-LABEL: @test_vst3_s8(
11942 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
11943 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
11944 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
11945 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
11946 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
11947 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
11948 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11949 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11950 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
11951 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
11952 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11953 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
11954 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
11955 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
11956 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
11957 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
11958 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
11959 // CHECK: ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)11960 void test_vst3_s8(int8_t *a, int8x8x3_t b) {
11961 vst3_s8(a, b);
11962 }
11963
11964 // CHECK-LABEL: @test_vst3_s16(
11965 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
11966 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
11967 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
11968 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
11969 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
11970 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
11971 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
11972 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
11973 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11974 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
11975 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
11976 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11977 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11978 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
11979 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
11980 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11981 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
11982 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
11983 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
11984 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11985 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11986 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11987 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11988 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
11989 // CHECK: ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)11990 void test_vst3_s16(int16_t *a, int16x4x3_t b) {
11991 vst3_s16(a, b);
11992 }
11993
11994 // CHECK-LABEL: @test_vst3_s32(
11995 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
11996 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
11997 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
11998 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
11999 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
12000 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
12001 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12002 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12003 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12004 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12005 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12006 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12007 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12008 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12009 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12010 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12011 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12012 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12013 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12014 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12015 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12016 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12017 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12018 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12019 // CHECK: ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)12020 void test_vst3_s32(int32_t *a, int32x2x3_t b) {
12021 vst3_s32(a, b);
12022 }
12023
12024 // CHECK-LABEL: @test_vst3_s64(
12025 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
12026 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
12027 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
12028 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12029 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
12030 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
12031 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12032 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12033 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12034 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12035 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12036 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12037 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12038 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12039 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12040 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12041 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12042 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12043 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12044 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12045 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12046 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12047 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12048 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12049 // CHECK: ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)12050 void test_vst3_s64(int64_t *a, int64x1x3_t b) {
12051 vst3_s64(a, b);
12052 }
12053
12054 // CHECK-LABEL: @test_vst3_f16(
12055 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
12056 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
12057 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
12058 // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
12059 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
12060 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
12061 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12062 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
12063 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12064 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
12065 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12066 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12067 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12068 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
12069 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12070 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12071 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12072 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
12073 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
12074 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12075 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12076 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12077 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12078 // CHECK: call void @llvm.aarch64.neon.st3.v4f16.p0i8(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i8* [[TMP2]])
12079 // CHECK: ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)12080 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
12081 vst3_f16(a, b);
12082 }
12083
12084 // CHECK-LABEL: @test_vst3_f32(
12085 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
12086 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
12087 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
12088 // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
12089 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
12090 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
12091 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12092 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
12093 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12094 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
12095 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12096 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12097 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12098 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
12099 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12100 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12101 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
12102 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
12103 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
12104 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12105 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12106 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12107 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12108 // CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]])
12109 // CHECK: ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)12110 void test_vst3_f32(float32_t *a, float32x2x3_t b) {
12111 vst3_f32(a, b);
12112 }
12113
12114 // CHECK-LABEL: @test_vst3_f64(
12115 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
12116 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
12117 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
12118 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
12119 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
12120 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
12121 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12122 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
12123 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12124 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
12125 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12126 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12127 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12128 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
12129 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12130 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12131 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
12132 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
12133 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
12134 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12135 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12136 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12137 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12138 // CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]])
12139 // CHECK: ret void
test_vst3_f64(float64_t * a,float64x1x3_t b)12140 void test_vst3_f64(float64_t *a, float64x1x3_t b) {
12141 vst3_f64(a, b);
12142 }
12143
12144 // CHECK-LABEL: @test_vst3_p8(
12145 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
12146 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
12147 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
12148 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12149 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
12150 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
12151 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12152 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12153 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12154 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12155 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12156 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12157 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12158 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
12159 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12160 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12161 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12162 // CHECK: ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)12163 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
12164 vst3_p8(a, b);
12165 }
12166
12167 // CHECK-LABEL: @test_vst3_p16(
12168 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
12169 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
12170 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
12171 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12172 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
12173 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
12174 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
12175 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12176 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12177 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12178 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12179 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12180 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12181 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12182 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12183 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12184 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
12185 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12186 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12187 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12188 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12189 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12190 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12191 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12192 // CHECK: ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)12193 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
12194 vst3_p16(a, b);
12195 }
12196
12197 // CHECK-LABEL: @test_vst4q_u8(
12198 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
12199 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
12200 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
12201 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12202 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
12203 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
12204 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12205 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12206 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12207 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12208 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12209 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12210 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12211 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12212 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12213 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12214 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
12215 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12216 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12217 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12218 // CHECK: ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)12219 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
12220 vst4q_u8(a, b);
12221 }
12222
12223 // CHECK-LABEL: @test_vst4q_u16(
12224 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
12225 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
12226 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
12227 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12228 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
12229 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
12230 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12231 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12232 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12233 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12234 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12235 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12236 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12237 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12238 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12239 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12240 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12241 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12242 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12243 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12244 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
12245 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12246 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12247 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12248 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12249 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12250 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12251 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12252 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12253 // CHECK: ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)12254 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
12255 vst4q_u16(a, b);
12256 }
12257
12258 // CHECK-LABEL: @test_vst4q_u32(
12259 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
12260 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
12261 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
12262 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12263 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
12264 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
12265 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12266 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12267 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12268 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12269 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12270 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12271 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12272 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12273 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12274 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12275 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12276 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12277 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12278 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12279 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
12280 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12281 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12282 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12283 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12284 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12285 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12286 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12287 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12288 // CHECK: ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)12289 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
12290 vst4q_u32(a, b);
12291 }
12292
12293 // CHECK-LABEL: @test_vst4q_u64(
12294 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
12295 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
12296 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
12297 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12298 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
12299 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
12300 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12301 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12302 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12303 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12304 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12305 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12306 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12307 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12308 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12309 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12310 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12311 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12312 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12313 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12314 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
12315 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12316 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12317 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12318 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12319 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12320 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12321 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12322 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12323 // CHECK: ret void
test_vst4q_u64(uint64_t * a,uint64x2x4_t b)12324 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
12325 vst4q_u64(a, b);
12326 }
12327
12328 // CHECK-LABEL: @test_vst4q_s8(
12329 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
12330 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
12331 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
12332 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12333 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
12334 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
12335 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12336 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12337 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12338 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12339 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12340 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12341 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12342 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12343 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12344 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12345 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
12346 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12347 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12348 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12349 // CHECK: ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)12350 void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
12351 vst4q_s8(a, b);
12352 }
12353
12354 // CHECK-LABEL: @test_vst4q_s16(
12355 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
12356 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
12357 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
12358 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12359 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
12360 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
12361 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12362 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12363 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12364 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12365 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12366 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12367 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12368 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12369 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12370 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12371 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12372 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12373 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12374 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12375 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
12376 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12377 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12378 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12379 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12380 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12381 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12382 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12383 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12384 // CHECK: ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)12385 void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
12386 vst4q_s16(a, b);
12387 }
12388
12389 // CHECK-LABEL: @test_vst4q_s32(
12390 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
12391 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
12392 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
12393 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
12394 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
12395 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
12396 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12397 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12398 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12399 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
12400 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12401 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12402 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12403 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12404 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12405 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12406 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12407 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12408 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12409 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12410 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
12411 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
12412 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
12413 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
12414 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12415 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12416 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12417 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
12418 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
12419 // CHECK: ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)12420 void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
12421 vst4q_s32(a, b);
12422 }
12423
12424 // CHECK-LABEL: @test_vst4q_s64(
12425 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
12426 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
12427 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
12428 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
12429 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
12430 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
12431 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12432 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12433 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12434 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
12435 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12436 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12437 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12438 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12439 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12440 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12441 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12442 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12443 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12444 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12445 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
12446 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
12447 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
12448 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12449 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12450 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12451 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12452 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12453 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
12454 // CHECK: ret void
test_vst4q_s64(int64_t * a,int64x2x4_t b)12455 void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
12456 vst4q_s64(a, b);
12457 }
12458
12459 // CHECK-LABEL: @test_vst4q_f16(
12460 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
12461 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
12462 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
12463 // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
12464 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
12465 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
12466 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12467 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
12468 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12469 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
12470 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
12471 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
12472 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12473 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
12474 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
12475 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
12476 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12477 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
12478 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
12479 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
12480 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
12481 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
12482 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
12483 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
12484 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
12485 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
12486 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
12487 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
12488 // CHECK: call void @llvm.aarch64.neon.st4.v8f16.p0i8(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i8* [[TMP2]])
12489 // CHECK: ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)12490 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
12491 vst4q_f16(a, b);
12492 }
12493
12494 // CHECK-LABEL: @test_vst4q_f32(
12495 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
12496 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
12497 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
12498 // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
12499 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
12500 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
12501 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12502 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
12503 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12504 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
12505 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
12506 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
12507 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12508 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
12509 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12510 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12511 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12512 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
12513 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
12514 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
12515 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
12516 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
12517 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
12518 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
12519 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12520 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12521 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
12522 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
12523 // CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]])
12524 // CHECK: ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)12525 void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
12526 vst4q_f32(a, b);
12527 }
12528
12529 // CHECK-LABEL: @test_vst4q_f64(
12530 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
12531 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
12532 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
12533 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
12534 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
12535 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
12536 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12537 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
12538 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12539 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
12540 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12541 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12542 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12543 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
12544 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12545 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12546 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12547 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
12548 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
12549 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12550 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
12551 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
12552 // CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
12553 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
12554 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12555 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12556 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12557 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
12558 // CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]])
12559 // CHECK: ret void
test_vst4q_f64(float64_t * a,float64x2x4_t b)12560 void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
12561 vst4q_f64(a, b);
12562 }
12563
12564 // CHECK-LABEL: @test_vst4q_p8(
12565 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
12566 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
12567 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
12568 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
12569 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
12570 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
12571 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12572 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12573 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
12574 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12575 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12576 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12577 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12578 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12579 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12580 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12581 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
12582 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
12583 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
12584 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
12585 // CHECK: ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)12586 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
12587 vst4q_p8(a, b);
12588 }
12589
12590 // CHECK-LABEL: @test_vst4q_p16(
12591 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
12592 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
12593 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
12594 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
12595 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
12596 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
12597 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
12598 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12599 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12600 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
12601 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12602 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12603 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12604 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12605 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12606 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12607 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12608 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12609 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12610 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12611 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
12612 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
12613 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
12614 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
12615 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12616 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12617 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12618 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
12619 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
12620 // CHECK: ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)12621 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
12622 vst4q_p16(a, b);
12623 }
12624
12625 // CHECK-LABEL: @test_vst4_u8(
12626 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
12627 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
12628 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
12629 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12630 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
12631 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
12632 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12633 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12634 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12635 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12636 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12637 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12638 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12639 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12640 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12641 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12642 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
12643 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12644 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12645 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12646 // CHECK: ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)12647 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
12648 vst4_u8(a, b);
12649 }
12650
12651 // CHECK-LABEL: @test_vst4_u16(
12652 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
12653 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
12654 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
12655 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12656 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
12657 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
12658 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12659 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12660 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12661 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12662 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12663 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12664 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12665 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12666 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12667 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12668 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12669 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12670 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12671 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12672 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
12673 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12674 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12675 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12676 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12677 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12678 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12679 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12680 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12681 // CHECK: ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)12682 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
12683 vst4_u16(a, b);
12684 }
12685
12686 // CHECK-LABEL: @test_vst4_u32(
12687 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
12688 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
12689 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
12690 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12691 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
12692 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
12693 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12694 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12695 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12696 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12697 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12698 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12699 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12700 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12701 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12702 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12703 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12704 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12705 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12706 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12707 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
12708 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12709 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12710 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12711 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12712 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12713 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12714 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12715 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12716 // CHECK: ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)12717 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
12718 vst4_u32(a, b);
12719 }
12720
12721 // CHECK-LABEL: @test_vst4_u64(
12722 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
12723 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
12724 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
12725 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12726 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
12727 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
12728 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12729 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12730 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12731 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12732 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12733 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12734 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12735 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12736 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12737 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12738 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12739 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12740 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12741 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12742 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
12743 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12744 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12745 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12746 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12747 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12748 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12749 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12750 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12751 // CHECK: ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)12752 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
12753 vst4_u64(a, b);
12754 }
12755
12756 // CHECK-LABEL: @test_vst4_s8(
12757 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
12758 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
12759 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
12760 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12761 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
12762 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
12763 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12764 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12765 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
12766 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12767 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12768 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12769 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12770 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12771 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12772 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12773 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
12774 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
12775 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
12776 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
12777 // CHECK: ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)12778 void test_vst4_s8(int8_t *a, int8x8x4_t b) {
12779 vst4_s8(a, b);
12780 }
12781
12782 // CHECK-LABEL: @test_vst4_s16(
12783 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
12784 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
12785 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
12786 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
12787 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
12788 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
12789 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12790 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
12791 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12792 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
12793 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12794 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12795 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12796 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12797 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12798 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12799 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12800 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12801 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12802 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12803 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
12804 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
12805 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
12806 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12807 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12808 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12809 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12810 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12811 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
12812 // CHECK: ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)12813 void test_vst4_s16(int16_t *a, int16x4x4_t b) {
12814 vst4_s16(a, b);
12815 }
12816
12817 // CHECK-LABEL: @test_vst4_s32(
12818 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
12819 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
12820 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
12821 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
12822 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
12823 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
12824 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12825 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
12826 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12827 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
12828 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12829 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12830 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12831 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12832 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12833 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12834 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12835 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12836 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12837 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12838 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
12839 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
12840 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
12841 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12842 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12843 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12844 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12845 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12846 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
12847 // CHECK: ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)12848 void test_vst4_s32(int32_t *a, int32x2x4_t b) {
12849 vst4_s32(a, b);
12850 }
12851
12852 // CHECK-LABEL: @test_vst4_s64(
12853 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
12854 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
12855 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
12856 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
12857 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
12858 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
12859 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12860 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
12861 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12862 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
12863 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12864 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12865 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12866 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12867 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12868 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12869 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12870 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12871 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12872 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12873 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
12874 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
12875 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
12876 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12877 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12878 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12879 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12880 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12881 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
12882 // CHECK: ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)12883 void test_vst4_s64(int64_t *a, int64x1x4_t b) {
12884 vst4_s64(a, b);
12885 }
12886
12887 // CHECK-LABEL: @test_vst4_f16(
12888 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
12889 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
12890 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
12891 // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
12892 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
12893 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
12894 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12895 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
12896 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12897 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
12898 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12899 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12900 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12901 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
12902 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12903 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12904 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12905 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
12906 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
12907 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12908 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
12909 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
12910 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
12911 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
12912 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12913 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12914 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12915 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
12916 // CHECK: call void @llvm.aarch64.neon.st4.v4f16.p0i8(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i8* [[TMP2]])
12917 // CHECK: ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)12918 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
12919 vst4_f16(a, b);
12920 }
12921
12922 // CHECK-LABEL: @test_vst4_f32(
12923 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
12924 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
12925 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
12926 // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
12927 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
12928 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
12929 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12930 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
12931 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12932 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
12933 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12934 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12935 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12936 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
12937 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12938 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12939 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12940 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
12941 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
12942 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12943 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
12944 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
12945 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
12946 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
12947 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12948 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12949 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12950 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
12951 // CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]])
12952 // CHECK: ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)12953 void test_vst4_f32(float32_t *a, float32x2x4_t b) {
12954 vst4_f32(a, b);
12955 }
12956
12957 // CHECK-LABEL: @test_vst4_f64(
12958 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
12959 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
12960 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
12961 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
12962 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
12963 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
12964 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
12965 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
12966 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12967 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
12968 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12969 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12970 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12971 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
12972 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12973 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12974 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12975 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
12976 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
12977 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12978 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
12979 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
12980 // CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
12981 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
12982 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12983 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12984 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12985 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
12986 // CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]])
12987 // CHECK: ret void
test_vst4_f64(float64_t * a,float64x1x4_t b)12988 void test_vst4_f64(float64_t *a, float64x1x4_t b) {
12989 vst4_f64(a, b);
12990 }
12991
12992 // CHECK-LABEL: @test_vst4_p8(
12993 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
12994 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
12995 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
12996 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
12997 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
12998 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
12999 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13000 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13001 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13002 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13003 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13004 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13005 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13006 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13007 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13008 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13009 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13010 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13011 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13012 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13013 // CHECK: ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)13014 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
13015 vst4_p8(a, b);
13016 }
13017
13018 // CHECK-LABEL: @test_vst4_p16(
13019 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
13020 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
13021 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
13022 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13023 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
13024 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
13025 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13026 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
13027 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13028 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13029 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13030 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13031 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13032 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13033 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13034 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13035 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13036 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13037 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13038 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13039 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13040 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13041 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13042 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13043 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13044 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13045 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13046 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13047 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13048 // CHECK: ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)13049 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
13050 vst4_p16(a, b);
13051 }
13052
13053 // CHECK-LABEL: @test_vld1q_f64_x2(
13054 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
13055 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
13056 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13057 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13058 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13059 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]])
13060 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
13061 // CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
13062 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
13063 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
13064 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13065 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
13066 // CHECK: ret %struct.float64x2x2_t [[TMP6]]
test_vld1q_f64_x2(float64_t const * a)13067 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
13068 return vld1q_f64_x2(a);
13069 }
13070
13071 // CHECK-LABEL: @test_vld1q_p64_x2(
13072 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
13073 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
13074 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13075 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13076 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13077 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
13078 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
13079 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
13080 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
13081 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
13082 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
13083 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
13084 // CHECK: ret %struct.poly64x2x2_t [[TMP6]]
test_vld1q_p64_x2(poly64_t const * a)13085 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
13086 return vld1q_p64_x2(a);
13087 }
13088
13089 // CHECK-LABEL: @test_vld1_f64_x2(
13090 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
13091 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
13092 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13093 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13094 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13095 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]])
13096 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
13097 // CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
13098 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
13099 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
13100 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13101 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
13102 // CHECK: ret %struct.float64x1x2_t [[TMP6]]
test_vld1_f64_x2(float64_t const * a)13103 float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
13104 return vld1_f64_x2(a);
13105 }
13106
13107 // CHECK-LABEL: @test_vld1_p64_x2(
13108 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
13109 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
13110 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13111 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13112 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13113 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
13114 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
13115 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
13116 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
13117 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
13118 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
13119 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
13120 // CHECK: ret %struct.poly64x1x2_t [[TMP6]]
test_vld1_p64_x2(poly64_t const * a)13121 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
13122 return vld1_p64_x2(a);
13123 }
13124
13125 // CHECK-LABEL: @test_vld1q_f64_x3(
13126 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
13127 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
13128 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13129 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13130 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13131 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]])
13132 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
13133 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13134 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
13135 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
13136 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13137 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
13138 // CHECK: ret %struct.float64x2x3_t [[TMP6]]
test_vld1q_f64_x3(float64_t const * a)13139 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
13140 return vld1q_f64_x3(a);
13141 }
13142
13143 // CHECK-LABEL: @test_vld1q_p64_x3(
13144 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
13145 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
13146 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13147 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13148 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13149 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
13150 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
13151 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13152 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
13153 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
13154 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
13155 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
13156 // CHECK: ret %struct.poly64x2x3_t [[TMP6]]
test_vld1q_p64_x3(poly64_t const * a)13157 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
13158 return vld1q_p64_x3(a);
13159 }
13160
13161 // CHECK-LABEL: @test_vld1_f64_x3(
13162 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
13163 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
13164 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13165 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13166 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13167 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]])
13168 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
13169 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13170 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
13171 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
13172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13173 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
13174 // CHECK: ret %struct.float64x1x3_t [[TMP6]]
test_vld1_f64_x3(float64_t const * a)13175 float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
13176 return vld1_f64_x3(a);
13177 }
13178
13179 // CHECK-LABEL: @test_vld1_p64_x3(
13180 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
13181 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
13182 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13183 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13184 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13185 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
13186 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
13187 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13188 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
13189 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
13190 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
13191 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
13192 // CHECK: ret %struct.poly64x1x3_t [[TMP6]]
test_vld1_p64_x3(poly64_t const * a)13193 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
13194 return vld1_p64_x3(a);
13195 }
13196
13197 // CHECK-LABEL: @test_vld1q_f64_x4(
13198 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
13199 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
13200 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13201 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13202 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13203 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]])
13204 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
13205 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
13206 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
13207 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
13208 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13209 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
13210 // CHECK: ret %struct.float64x2x4_t [[TMP6]]
test_vld1q_f64_x4(float64_t const * a)13211 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
13212 return vld1q_f64_x4(a);
13213 }
13214
13215 // CHECK-LABEL: @test_vld1q_p64_x4(
13216 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
13217 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
13218 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13219 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13220 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13221 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
13222 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
13223 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
13224 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
13225 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
13226 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
13227 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
13228 // CHECK: ret %struct.poly64x2x4_t [[TMP6]]
test_vld1q_p64_x4(poly64_t const * a)13229 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
13230 return vld1q_p64_x4(a);
13231 }
13232
13233 // CHECK-LABEL: @test_vld1_f64_x4(
13234 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
13235 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
13236 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13237 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
13238 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
13239 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]])
13240 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
13241 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
13242 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
13243 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
13244 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13245 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
13246 // CHECK: ret %struct.float64x1x4_t [[TMP6]]
test_vld1_f64_x4(float64_t const * a)13247 float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
13248 return vld1_f64_x4(a);
13249 }
13250
13251 // CHECK-LABEL: @test_vld1_p64_x4(
13252 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
13253 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
13254 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13255 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
13256 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
13257 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
13258 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
13259 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
13260 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
13261 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
13262 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
13263 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
13264 // CHECK: ret %struct.poly64x1x4_t [[TMP6]]
test_vld1_p64_x4(poly64_t const * a)13265 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
13266 return vld1_p64_x4(a);
13267 }
13268
13269 // CHECK-LABEL: @test_vst1q_f64_x2(
13270 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
13271 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
13272 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
13273 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
13274 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
13275 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
13276 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13277 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13278 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13279 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
13280 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13281 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13282 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
13283 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
13284 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13285 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13286 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13287 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13288 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13289 // CHECK: call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]])
13290 // CHECK: ret void
test_vst1q_f64_x2(float64_t * a,float64x2x2_t b)13291 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
13292 vst1q_f64_x2(a, b);
13293 }
13294
13295 // CHECK-LABEL: @test_vst1q_p64_x2(
13296 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
13297 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
13298 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
13299 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
13300 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
13301 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
13302 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
13303 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13304 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13305 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
13306 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13307 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13308 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
13309 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13310 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13311 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13312 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13313 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13314 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13315 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
13316 // CHECK: ret void
test_vst1q_p64_x2(poly64_t * a,poly64x2x2_t b)13317 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
13318 vst1q_p64_x2(a, b);
13319 }
13320
13321 // CHECK-LABEL: @test_vst1_f64_x2(
13322 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
13323 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
13324 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
13325 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
13326 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
13327 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
13328 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13329 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13330 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13331 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
13332 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13333 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13334 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
13335 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
13336 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13337 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13338 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13339 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13340 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
13341 // CHECK: call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]])
13342 // CHECK: ret void
test_vst1_f64_x2(float64_t * a,float64x1x2_t b)13343 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
13344 vst1_f64_x2(a, b);
13345 }
13346
13347 // CHECK-LABEL: @test_vst1_p64_x2(
13348 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
13349 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
13350 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
13351 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
13352 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
13353 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
13354 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
13355 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13356 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13357 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
13358 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13359 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13360 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
13361 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13362 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13363 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13364 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13365 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13366 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
13367 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
13368 // CHECK: ret void
test_vst1_p64_x2(poly64_t * a,poly64x1x2_t b)13369 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
13370 vst1_p64_x2(a, b);
13371 }
13372
13373 // CHECK-LABEL: @test_vst1q_f64_x3(
13374 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
13375 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
13376 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
13377 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
13378 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
13379 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
13380 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13381 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13382 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13383 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
13384 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13385 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13386 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13387 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
13388 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13389 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13390 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
13391 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
13392 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13393 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13394 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13395 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13396 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13397 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13398 // CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]])
13399 // CHECK: ret void
test_vst1q_f64_x3(float64_t * a,float64x2x3_t b)13400 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
13401 vst1q_f64_x3(a, b);
13402 }
13403
13404 // CHECK-LABEL: @test_vst1q_p64_x3(
13405 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
13406 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
13407 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
13408 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
13409 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
13410 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
13411 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
13412 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13413 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13414 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
13415 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13416 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13417 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13418 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13419 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13420 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13421 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
13422 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13423 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13424 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13425 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13426 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13427 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13428 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13429 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
13430 // CHECK: ret void
test_vst1q_p64_x3(poly64_t * a,poly64x2x3_t b)13431 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
13432 vst1q_p64_x3(a, b);
13433 }
13434
13435 // CHECK-LABEL: @test_vst1_f64_x3(
13436 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
13437 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
13438 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
13439 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
13440 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
13441 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
13442 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13443 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13444 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13445 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
13446 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13447 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13448 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13449 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
13450 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13451 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13452 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13453 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
13454 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13455 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13456 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13457 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13458 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13459 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
13460 // CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]])
13461 // CHECK: ret void
test_vst1_f64_x3(float64_t * a,float64x1x3_t b)13462 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
13463 vst1_f64_x3(a, b);
13464 }
13465
13466 // CHECK-LABEL: @test_vst1_p64_x3(
13467 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
13468 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
13469 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
13470 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
13471 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
13472 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
13473 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
13474 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13475 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13476 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
13477 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13478 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13479 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13480 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13481 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13482 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13483 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
13484 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13485 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13486 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13487 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13488 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13489 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13490 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
13491 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
13492 // CHECK: ret void
test_vst1_p64_x3(poly64_t * a,poly64x1x3_t b)13493 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
13494 vst1_p64_x3(a, b);
13495 }
13496
13497 // CHECK-LABEL: @test_vst1q_f64_x4(
13498 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
13499 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
13500 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
13501 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
13502 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
13503 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
13504 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13505 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13506 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13507 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
13508 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13509 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13510 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13511 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
13512 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13513 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13514 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13515 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
13516 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13517 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13518 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13519 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
13520 // CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
13521 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
13522 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13523 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13524 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13525 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
13526 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13527 // CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]])
13528 // CHECK: ret void
test_vst1q_f64_x4(float64_t * a,float64x2x4_t b)13529 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
13530 vst1q_f64_x4(a, b);
13531 }
13532
13533 // CHECK-LABEL: @test_vst1q_p64_x4(
13534 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
13535 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
13536 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
13537 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13538 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
13539 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
13540 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
13541 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13542 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13543 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13544 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13545 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13546 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13547 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13548 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13549 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13550 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13551 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13552 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13553 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13554 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
13555 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13556 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13557 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13558 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13559 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13560 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13561 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13562 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13563 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
13564 // CHECK: ret void
test_vst1q_p64_x4(poly64_t * a,poly64x2x4_t b)13565 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
13566 vst1q_p64_x4(a, b);
13567 }
13568
13569 // CHECK-LABEL: @test_vst1_f64_x4(
13570 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
13571 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
13572 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
13573 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
13574 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
13575 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
13576 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13577 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
13578 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13579 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
13580 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13581 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13582 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13583 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
13584 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13585 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13586 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13587 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
13588 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13589 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13590 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13591 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
13592 // CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
13593 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
13594 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13595 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13596 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13597 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
13598 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
13599 // CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]])
13600 // CHECK: ret void
test_vst1_f64_x4(float64_t * a,float64x1x4_t b)13601 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
13602 vst1_f64_x4(a, b);
13603 }
13604
13605 // CHECK-LABEL: @test_vst1_p64_x4(
13606 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
13607 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
13608 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
13609 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13610 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
13611 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
13612 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
13613 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
13614 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13615 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13616 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13617 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13618 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13619 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13620 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13621 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13622 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13623 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13624 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13625 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13626 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
13627 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13628 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13629 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13630 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13631 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13632 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13633 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13634 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
13635 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
13636 // CHECK: ret void
test_vst1_p64_x4(poly64_t * a,poly64x1x4_t b)13637 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
13638 vst1_p64_x4(a, b);
13639 }
13640
13641 // CHECK-LABEL: @test_vceqd_s64(
13642 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
13643 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13644 // CHECK: ret i64 [[VCEQD_I]]
test_vceqd_s64(int64_t a,int64_t b)13645 int64_t test_vceqd_s64(int64_t a, int64_t b) {
13646 return (int64_t)vceqd_s64(a, b);
13647 }
13648
13649 // CHECK-LABEL: @test_vceqd_u64(
13650 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
13651 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13652 // CHECK: ret i64 [[VCEQD_I]]
test_vceqd_u64(uint64_t a,uint64_t b)13653 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
13654 return (int64_t)vceqd_u64(a, b);
13655 }
13656
13657 // CHECK-LABEL: @test_vceqzd_s64(
13658 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
13659 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
13660 // CHECK: ret i64 [[VCEQZ_I]]
test_vceqzd_s64(int64_t a)13661 int64_t test_vceqzd_s64(int64_t a) {
13662 return (int64_t)vceqzd_s64(a);
13663 }
13664
13665 // CHECK-LABEL: @test_vceqzd_u64(
13666 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
13667 // CHECK: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
13668 // CHECK: ret i64 [[VCEQZD_I]]
test_vceqzd_u64(int64_t a)13669 int64_t test_vceqzd_u64(int64_t a) {
13670 return (int64_t)vceqzd_u64(a);
13671 }
13672
13673 // CHECK-LABEL: @test_vcged_s64(
13674 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, %b
13675 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13676 // CHECK: ret i64 [[VCEQD_I]]
test_vcged_s64(int64_t a,int64_t b)13677 int64_t test_vcged_s64(int64_t a, int64_t b) {
13678 return (int64_t)vcged_s64(a, b);
13679 }
13680
13681 // CHECK-LABEL: @test_vcged_u64(
13682 // CHECK: [[TMP0:%.*]] = icmp uge i64 %a, %b
13683 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13684 // CHECK: ret i64 [[VCEQD_I]]
test_vcged_u64(uint64_t a,uint64_t b)13685 uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
13686 return (uint64_t)vcged_u64(a, b);
13687 }
13688
13689 // CHECK-LABEL: @test_vcgezd_s64(
13690 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, 0
13691 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13692 // CHECK: ret i64 [[VCGEZ_I]]
test_vcgezd_s64(int64_t a)13693 int64_t test_vcgezd_s64(int64_t a) {
13694 return (int64_t)vcgezd_s64(a);
13695 }
13696
13697 // CHECK-LABEL: @test_vcgtd_s64(
13698 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, %b
13699 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13700 // CHECK: ret i64 [[VCEQD_I]]
test_vcgtd_s64(int64_t a,int64_t b)13701 int64_t test_vcgtd_s64(int64_t a, int64_t b) {
13702 return (int64_t)vcgtd_s64(a, b);
13703 }
13704
13705 // CHECK-LABEL: @test_vcgtd_u64(
13706 // CHECK: [[TMP0:%.*]] = icmp ugt i64 %a, %b
13707 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13708 // CHECK: ret i64 [[VCEQD_I]]
test_vcgtd_u64(uint64_t a,uint64_t b)13709 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
13710 return (uint64_t)vcgtd_u64(a, b);
13711 }
13712
13713 // CHECK-LABEL: @test_vcgtzd_s64(
13714 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, 0
13715 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13716 // CHECK: ret i64 [[VCGTZ_I]]
test_vcgtzd_s64(int64_t a)13717 int64_t test_vcgtzd_s64(int64_t a) {
13718 return (int64_t)vcgtzd_s64(a);
13719 }
13720
13721 // CHECK-LABEL: @test_vcled_s64(
13722 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, %b
13723 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13724 // CHECK: ret i64 [[VCEQD_I]]
test_vcled_s64(int64_t a,int64_t b)13725 int64_t test_vcled_s64(int64_t a, int64_t b) {
13726 return (int64_t)vcled_s64(a, b);
13727 }
13728
13729 // CHECK-LABEL: @test_vcled_u64(
13730 // CHECK: [[TMP0:%.*]] = icmp ule i64 %a, %b
13731 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13732 // CHECK: ret i64 [[VCEQD_I]]
test_vcled_u64(uint64_t a,uint64_t b)13733 uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
13734 return (uint64_t)vcled_u64(a, b);
13735 }
13736
13737 // CHECK-LABEL: @test_vclezd_s64(
13738 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, 0
13739 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13740 // CHECK: ret i64 [[VCLEZ_I]]
test_vclezd_s64(int64_t a)13741 int64_t test_vclezd_s64(int64_t a) {
13742 return (int64_t)vclezd_s64(a);
13743 }
13744
13745 // CHECK-LABEL: @test_vcltd_s64(
13746 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, %b
13747 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13748 // CHECK: ret i64 [[VCEQD_I]]
test_vcltd_s64(int64_t a,int64_t b)13749 int64_t test_vcltd_s64(int64_t a, int64_t b) {
13750 return (int64_t)vcltd_s64(a, b);
13751 }
13752
13753 // CHECK-LABEL: @test_vcltd_u64(
13754 // CHECK: [[TMP0:%.*]] = icmp ult i64 %a, %b
13755 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
13756 // CHECK: ret i64 [[VCEQD_I]]
test_vcltd_u64(uint64_t a,uint64_t b)13757 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
13758 return (uint64_t)vcltd_u64(a, b);
13759 }
13760
13761 // CHECK-LABEL: @test_vcltzd_s64(
13762 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, 0
13763 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13764 // CHECK: ret i64 [[VCLTZ_I]]
test_vcltzd_s64(int64_t a)13765 int64_t test_vcltzd_s64(int64_t a) {
13766 return (int64_t)vcltzd_s64(a);
13767 }
13768
13769 // CHECK-LABEL: @test_vtstd_s64(
13770 // CHECK: [[TMP0:%.*]] = and i64 %a, %b
13771 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13772 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13773 // CHECK: ret i64 [[VTSTD_I]]
test_vtstd_s64(int64_t a,int64_t b)13774 int64_t test_vtstd_s64(int64_t a, int64_t b) {
13775 return (int64_t)vtstd_s64(a, b);
13776 }
13777
13778 // CHECK-LABEL: @test_vtstd_u64(
13779 // CHECK: [[TMP0:%.*]] = and i64 %a, %b
13780 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
13781 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
13782 // CHECK: ret i64 [[VTSTD_I]]
test_vtstd_u64(uint64_t a,uint64_t b)13783 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
13784 return (uint64_t)vtstd_u64(a, b);
13785 }
13786
13787 // CHECK-LABEL: @test_vabsd_s64(
13788 // CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a)
13789 // CHECK: ret i64 [[VABSD_S64_I]]
test_vabsd_s64(int64_t a)13790 int64_t test_vabsd_s64(int64_t a) {
13791 return (int64_t)vabsd_s64(a);
13792 }
13793
13794 // CHECK-LABEL: @test_vqabsb_s8(
13795 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13796 // CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
13797 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
13798 // CHECK: ret i8 [[TMP1]]
test_vqabsb_s8(int8_t a)13799 int8_t test_vqabsb_s8(int8_t a) {
13800 return (int8_t)vqabsb_s8(a);
13801 }
13802
13803 // CHECK-LABEL: @test_vqabsh_s16(
13804 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13805 // CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
13806 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
13807 // CHECK: ret i16 [[TMP1]]
test_vqabsh_s16(int16_t a)13808 int16_t test_vqabsh_s16(int16_t a) {
13809 return (int16_t)vqabsh_s16(a);
13810 }
13811
13812 // CHECK-LABEL: @test_vqabss_s32(
13813 // CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
13814 // CHECK: ret i32 [[VQABSS_S32_I]]
test_vqabss_s32(int32_t a)13815 int32_t test_vqabss_s32(int32_t a) {
13816 return (int32_t)vqabss_s32(a);
13817 }
13818
13819 // CHECK-LABEL: @test_vqabsd_s64(
13820 // CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
13821 // CHECK: ret i64 [[VQABSD_S64_I]]
test_vqabsd_s64(int64_t a)13822 int64_t test_vqabsd_s64(int64_t a) {
13823 return (int64_t)vqabsd_s64(a);
13824 }
13825
13826 // CHECK-LABEL: @test_vnegd_s64(
13827 // CHECK: [[VNEGD_I:%.*]] = sub i64 0, %a
13828 // CHECK: ret i64 [[VNEGD_I]]
test_vnegd_s64(int64_t a)13829 int64_t test_vnegd_s64(int64_t a) {
13830 return (int64_t)vnegd_s64(a);
13831 }
13832
13833 // CHECK-LABEL: @test_vqnegb_s8(
13834 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13835 // CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
13836 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
13837 // CHECK: ret i8 [[TMP1]]
test_vqnegb_s8(int8_t a)13838 int8_t test_vqnegb_s8(int8_t a) {
13839 return (int8_t)vqnegb_s8(a);
13840 }
13841
13842 // CHECK-LABEL: @test_vqnegh_s16(
13843 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13844 // CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
13845 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
13846 // CHECK: ret i16 [[TMP1]]
test_vqnegh_s16(int16_t a)13847 int16_t test_vqnegh_s16(int16_t a) {
13848 return (int16_t)vqnegh_s16(a);
13849 }
13850
13851 // CHECK-LABEL: @test_vqnegs_s32(
13852 // CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
13853 // CHECK: ret i32 [[VQNEGS_S32_I]]
test_vqnegs_s32(int32_t a)13854 int32_t test_vqnegs_s32(int32_t a) {
13855 return (int32_t)vqnegs_s32(a);
13856 }
13857
13858 // CHECK-LABEL: @test_vqnegd_s64(
13859 // CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
13860 // CHECK: ret i64 [[VQNEGD_S64_I]]
test_vqnegd_s64(int64_t a)13861 int64_t test_vqnegd_s64(int64_t a) {
13862 return (int64_t)vqnegd_s64(a);
13863 }
13864
13865 // CHECK-LABEL: @test_vuqaddb_s8(
13866 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13867 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
13868 // CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13869 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
13870 // CHECK: ret i8 [[TMP2]]
test_vuqaddb_s8(int8_t a,uint8_t b)13871 int8_t test_vuqaddb_s8(int8_t a, uint8_t b) {
13872 return (int8_t)vuqaddb_s8(a, b);
13873 }
13874
13875 // CHECK-LABEL: @test_vuqaddh_s16(
13876 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13877 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13878 // CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13879 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
13880 // CHECK: ret i16 [[TMP2]]
test_vuqaddh_s16(int16_t a,uint16_t b)13881 int16_t test_vuqaddh_s16(int16_t a, uint16_t b) {
13882 return (int16_t)vuqaddh_s16(a, b);
13883 }
13884
13885 // CHECK-LABEL: @test_vuqadds_s32(
13886 // CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
13887 // CHECK: ret i32 [[VUQADDS_S32_I]]
test_vuqadds_s32(int32_t a,uint32_t b)13888 int32_t test_vuqadds_s32(int32_t a, uint32_t b) {
13889 return (int32_t)vuqadds_s32(a, b);
13890 }
13891
13892 // CHECK-LABEL: @test_vuqaddd_s64(
13893 // CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
13894 // CHECK: ret i64 [[VUQADDD_S64_I]]
test_vuqaddd_s64(int64_t a,uint64_t b)13895 int64_t test_vuqaddd_s64(int64_t a, uint64_t b) {
13896 return (int64_t)vuqaddd_s64(a, b);
13897 }
13898
13899 // CHECK-LABEL: @test_vsqaddb_u8(
13900 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
13901 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
13902 // CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13903 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
13904 // CHECK: ret i8 [[TMP2]]
test_vsqaddb_u8(uint8_t a,int8_t b)13905 uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
13906 return (uint8_t)vsqaddb_u8(a, b);
13907 }
13908
13909 // CHECK-LABEL: @test_vsqaddh_u16(
13910 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13911 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13912 // CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13913 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
13914 // CHECK: ret i16 [[TMP2]]
test_vsqaddh_u16(uint16_t a,int16_t b)13915 uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
13916 return (uint16_t)vsqaddh_u16(a, b);
13917 }
13918
13919 // CHECK-LABEL: @test_vsqadds_u32(
13920 // CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
13921 // CHECK: ret i32 [[VSQADDS_U32_I]]
test_vsqadds_u32(uint32_t a,int32_t b)13922 uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
13923 return (uint32_t)vsqadds_u32(a, b);
13924 }
13925
13926 // CHECK-LABEL: @test_vsqaddd_u64(
13927 // CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
13928 // CHECK: ret i64 [[VSQADDD_U64_I]]
test_vsqaddd_u64(uint64_t a,int64_t b)13929 uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
13930 return (uint64_t)vsqaddd_u64(a, b);
13931 }
13932
13933 // CHECK-LABEL: @test_vqdmlalh_s16(
13934 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13935 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
13936 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13937 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13938 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
13939 // CHECK: ret i32 [[VQDMLXL1_I]]
test_vqdmlalh_s16(int32_t a,int16_t b,int16_t c)13940 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
13941 return (int32_t)vqdmlalh_s16(a, b, c);
13942 }
13943
13944 // CHECK-LABEL: @test_vqdmlals_s32(
13945 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13946 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
13947 // CHECK: ret i64 [[VQDMLXL1_I]]
test_vqdmlals_s32(int64_t a,int32_t b,int32_t c)13948 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
13949 return (int64_t)vqdmlals_s32(a, b, c);
13950 }
13951
13952 // CHECK-LABEL: @test_vqdmlslh_s16(
13953 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13954 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
13955 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13956 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13957 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
13958 // CHECK: ret i32 [[VQDMLXL1_I]]
test_vqdmlslh_s16(int32_t a,int16_t b,int16_t c)13959 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
13960 return (int32_t)vqdmlslh_s16(a, b, c);
13961 }
13962
13963 // CHECK-LABEL: @test_vqdmlsls_s32(
13964 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13965 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
13966 // CHECK: ret i64 [[VQDMLXL1_I]]
test_vqdmlsls_s32(int64_t a,int32_t b,int32_t c)13967 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
13968 return (int64_t)vqdmlsls_s32(a, b, c);
13969 }
13970
13971 // CHECK-LABEL: @test_vqdmullh_s16(
13972 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
13973 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
13974 // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13975 // CHECK: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
13976 // CHECK: ret i32 [[TMP2]]
test_vqdmullh_s16(int16_t a,int16_t b)13977 int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
13978 return (int32_t)vqdmullh_s16(a, b);
13979 }
13980
13981 // CHECK-LABEL: @test_vqdmulls_s32(
13982 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
13983 // CHECK: ret i64 [[VQDMULLS_S32_I]]
test_vqdmulls_s32(int32_t a,int32_t b)13984 int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
13985 return (int64_t)vqdmulls_s32(a, b);
13986 }
13987
13988 // CHECK-LABEL: @test_vqmovunh_s16(
13989 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
13990 // CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
13991 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
13992 // CHECK: ret i8 [[TMP1]]
test_vqmovunh_s16(int16_t a)13993 int8_t test_vqmovunh_s16(int16_t a) {
13994 return (int8_t)vqmovunh_s16(a);
13995 }
13996
13997 // CHECK-LABEL: @test_vqmovuns_s32(
13998 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
13999 // CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
14000 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
14001 // CHECK: ret i16 [[TMP1]]
test_vqmovuns_s32(int32_t a)14002 int16_t test_vqmovuns_s32(int32_t a) {
14003 return (int16_t)vqmovuns_s32(a);
14004 }
14005
14006 // CHECK-LABEL: @test_vqmovund_s64(
14007 // CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
14008 // CHECK: ret i32 [[VQMOVUND_S64_I]]
test_vqmovund_s64(int64_t a)14009 int32_t test_vqmovund_s64(int64_t a) {
14010 return (int32_t)vqmovund_s64(a);
14011 }
14012
14013 // CHECK-LABEL: @test_vqmovnh_s16(
14014 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14015 // CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
14016 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
14017 // CHECK: ret i8 [[TMP1]]
test_vqmovnh_s16(int16_t a)14018 int8_t test_vqmovnh_s16(int16_t a) {
14019 return (int8_t)vqmovnh_s16(a);
14020 }
14021
14022 // CHECK-LABEL: @test_vqmovns_s32(
14023 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14024 // CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
14025 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
14026 // CHECK: ret i16 [[TMP1]]
test_vqmovns_s32(int32_t a)14027 int16_t test_vqmovns_s32(int32_t a) {
14028 return (int16_t)vqmovns_s32(a);
14029 }
14030
14031 // CHECK-LABEL: @test_vqmovnd_s64(
14032 // CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
14033 // CHECK: ret i32 [[VQMOVND_S64_I]]
test_vqmovnd_s64(int64_t a)14034 int32_t test_vqmovnd_s64(int64_t a) {
14035 return (int32_t)vqmovnd_s64(a);
14036 }
14037
14038 // CHECK-LABEL: @test_vqmovnh_u16(
14039 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14040 // CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
14041 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
14042 // CHECK: ret i8 [[TMP1]]
test_vqmovnh_u16(int16_t a)14043 int8_t test_vqmovnh_u16(int16_t a) {
14044 return (int8_t)vqmovnh_u16(a);
14045 }
14046
14047 // CHECK-LABEL: @test_vqmovns_u32(
14048 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14049 // CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
14050 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
14051 // CHECK: ret i16 [[TMP1]]
test_vqmovns_u32(int32_t a)14052 int16_t test_vqmovns_u32(int32_t a) {
14053 return (int16_t)vqmovns_u32(a);
14054 }
14055
14056 // CHECK-LABEL: @test_vqmovnd_u64(
14057 // CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
14058 // CHECK: ret i32 [[VQMOVND_U64_I]]
test_vqmovnd_u64(int64_t a)14059 int32_t test_vqmovnd_u64(int64_t a) {
14060 return (int32_t)vqmovnd_u64(a);
14061 }
14062
14063 // CHECK-LABEL: @test_vceqs_f32(
14064 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, %b
14065 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14066 // CHECK: ret i32 [[VCMPD_I]]
test_vceqs_f32(float32_t a,float32_t b)14067 uint32_t test_vceqs_f32(float32_t a, float32_t b) {
14068 return (uint32_t)vceqs_f32(a, b);
14069 }
14070
14071 // CHECK-LABEL: @test_vceqd_f64(
14072 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, %b
14073 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14074 // CHECK: ret i64 [[VCMPD_I]]
test_vceqd_f64(float64_t a,float64_t b)14075 uint64_t test_vceqd_f64(float64_t a, float64_t b) {
14076 return (uint64_t)vceqd_f64(a, b);
14077 }
14078
14079 // CHECK-LABEL: @test_vceqzs_f32(
14080 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
14081 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
14082 // CHECK: ret i32 [[VCEQZ_I]]
test_vceqzs_f32(float32_t a)14083 uint32_t test_vceqzs_f32(float32_t a) {
14084 return (uint32_t)vceqzs_f32(a);
14085 }
14086
14087 // CHECK-LABEL: @test_vceqzd_f64(
14088 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
14089 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
14090 // CHECK: ret i64 [[VCEQZ_I]]
test_vceqzd_f64(float64_t a)14091 uint64_t test_vceqzd_f64(float64_t a) {
14092 return (uint64_t)vceqzd_f64(a);
14093 }
14094
14095 // CHECK-LABEL: @test_vcges_f32(
14096 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, %b
14097 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14098 // CHECK: ret i32 [[VCMPD_I]]
test_vcges_f32(float32_t a,float32_t b)14099 uint32_t test_vcges_f32(float32_t a, float32_t b) {
14100 return (uint32_t)vcges_f32(a, b);
14101 }
14102
14103 // CHECK-LABEL: @test_vcged_f64(
14104 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, %b
14105 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14106 // CHECK: ret i64 [[VCMPD_I]]
test_vcged_f64(float64_t a,float64_t b)14107 uint64_t test_vcged_f64(float64_t a, float64_t b) {
14108 return (uint64_t)vcged_f64(a, b);
14109 }
14110
14111 // CHECK-LABEL: @test_vcgezs_f32(
14112 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
14113 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14114 // CHECK: ret i32 [[VCGEZ_I]]
test_vcgezs_f32(float32_t a)14115 uint32_t test_vcgezs_f32(float32_t a) {
14116 return (uint32_t)vcgezs_f32(a);
14117 }
14118
14119 // CHECK-LABEL: @test_vcgezd_f64(
14120 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
14121 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14122 // CHECK: ret i64 [[VCGEZ_I]]
test_vcgezd_f64(float64_t a)14123 uint64_t test_vcgezd_f64(float64_t a) {
14124 return (uint64_t)vcgezd_f64(a);
14125 }
14126
14127 // CHECK-LABEL: @test_vcgts_f32(
14128 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, %b
14129 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14130 // CHECK: ret i32 [[VCMPD_I]]
test_vcgts_f32(float32_t a,float32_t b)14131 uint32_t test_vcgts_f32(float32_t a, float32_t b) {
14132 return (uint32_t)vcgts_f32(a, b);
14133 }
14134
14135 // CHECK-LABEL: @test_vcgtd_f64(
14136 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, %b
14137 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14138 // CHECK: ret i64 [[VCMPD_I]]
test_vcgtd_f64(float64_t a,float64_t b)14139 uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
14140 return (uint64_t)vcgtd_f64(a, b);
14141 }
14142
14143 // CHECK-LABEL: @test_vcgtzs_f32(
14144 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
14145 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14146 // CHECK: ret i32 [[VCGTZ_I]]
test_vcgtzs_f32(float32_t a)14147 uint32_t test_vcgtzs_f32(float32_t a) {
14148 return (uint32_t)vcgtzs_f32(a);
14149 }
14150
14151 // CHECK-LABEL: @test_vcgtzd_f64(
14152 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
14153 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14154 // CHECK: ret i64 [[VCGTZ_I]]
test_vcgtzd_f64(float64_t a)14155 uint64_t test_vcgtzd_f64(float64_t a) {
14156 return (uint64_t)vcgtzd_f64(a);
14157 }
14158
14159 // CHECK-LABEL: @test_vcles_f32(
14160 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, %b
14161 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14162 // CHECK: ret i32 [[VCMPD_I]]
test_vcles_f32(float32_t a,float32_t b)14163 uint32_t test_vcles_f32(float32_t a, float32_t b) {
14164 return (uint32_t)vcles_f32(a, b);
14165 }
14166
14167 // CHECK-LABEL: @test_vcled_f64(
14168 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, %b
14169 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14170 // CHECK: ret i64 [[VCMPD_I]]
test_vcled_f64(float64_t a,float64_t b)14171 uint64_t test_vcled_f64(float64_t a, float64_t b) {
14172 return (uint64_t)vcled_f64(a, b);
14173 }
14174
14175 // CHECK-LABEL: @test_vclezs_f32(
14176 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
14177 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
14178 // CHECK: ret i32 [[VCLEZ_I]]
test_vclezs_f32(float32_t a)14179 uint32_t test_vclezs_f32(float32_t a) {
14180 return (uint32_t)vclezs_f32(a);
14181 }
14182
14183 // CHECK-LABEL: @test_vclezd_f64(
14184 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
14185 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
14186 // CHECK: ret i64 [[VCLEZ_I]]
test_vclezd_f64(float64_t a)14187 uint64_t test_vclezd_f64(float64_t a) {
14188 return (uint64_t)vclezd_f64(a);
14189 }
14190
14191 // CHECK-LABEL: @test_vclts_f32(
14192 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, %b
14193 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
14194 // CHECK: ret i32 [[VCMPD_I]]
test_vclts_f32(float32_t a,float32_t b)14195 uint32_t test_vclts_f32(float32_t a, float32_t b) {
14196 return (uint32_t)vclts_f32(a, b);
14197 }
14198
14199 // CHECK-LABEL: @test_vcltd_f64(
14200 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, %b
14201 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
14202 // CHECK: ret i64 [[VCMPD_I]]
test_vcltd_f64(float64_t a,float64_t b)14203 uint64_t test_vcltd_f64(float64_t a, float64_t b) {
14204 return (uint64_t)vcltd_f64(a, b);
14205 }
14206
14207 // CHECK-LABEL: @test_vcltzs_f32(
14208 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
14209 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
14210 // CHECK: ret i32 [[VCLTZ_I]]
test_vcltzs_f32(float32_t a)14211 uint32_t test_vcltzs_f32(float32_t a) {
14212 return (uint32_t)vcltzs_f32(a);
14213 }
14214
14215 // CHECK-LABEL: @test_vcltzd_f64(
14216 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
14217 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
14218 // CHECK: ret i64 [[VCLTZ_I]]
test_vcltzd_f64(float64_t a)14219 uint64_t test_vcltzd_f64(float64_t a) {
14220 return (uint64_t)vcltzd_f64(a);
14221 }
14222
14223 // CHECK-LABEL: @test_vcages_f32(
14224 // CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b)
14225 // CHECK: ret i32 [[VCAGES_F32_I]]
test_vcages_f32(float32_t a,float32_t b)14226 uint32_t test_vcages_f32(float32_t a, float32_t b) {
14227 return (uint32_t)vcages_f32(a, b);
14228 }
14229
14230 // CHECK-LABEL: @test_vcaged_f64(
14231 // CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b)
14232 // CHECK: ret i64 [[VCAGED_F64_I]]
test_vcaged_f64(float64_t a,float64_t b)14233 uint64_t test_vcaged_f64(float64_t a, float64_t b) {
14234 return (uint64_t)vcaged_f64(a, b);
14235 }
14236
14237 // CHECK-LABEL: @test_vcagts_f32(
14238 // CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b)
14239 // CHECK: ret i32 [[VCAGTS_F32_I]]
test_vcagts_f32(float32_t a,float32_t b)14240 uint32_t test_vcagts_f32(float32_t a, float32_t b) {
14241 return (uint32_t)vcagts_f32(a, b);
14242 }
14243
14244 // CHECK-LABEL: @test_vcagtd_f64(
14245 // CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b)
14246 // CHECK: ret i64 [[VCAGTD_F64_I]]
test_vcagtd_f64(float64_t a,float64_t b)14247 uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
14248 return (uint64_t)vcagtd_f64(a, b);
14249 }
14250
14251 // CHECK-LABEL: @test_vcales_f32(
14252 // CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a)
14253 // CHECK: ret i32 [[VCALES_F32_I]]
test_vcales_f32(float32_t a,float32_t b)14254 uint32_t test_vcales_f32(float32_t a, float32_t b) {
14255 return (uint32_t)vcales_f32(a, b);
14256 }
14257
14258 // CHECK-LABEL: @test_vcaled_f64(
14259 // CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a)
14260 // CHECK: ret i64 [[VCALED_F64_I]]
test_vcaled_f64(float64_t a,float64_t b)14261 uint64_t test_vcaled_f64(float64_t a, float64_t b) {
14262 return (uint64_t)vcaled_f64(a, b);
14263 }
14264
14265 // CHECK-LABEL: @test_vcalts_f32(
14266 // CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
14267 // CHECK: ret i32 [[VCALTS_F32_I]]
test_vcalts_f32(float32_t a,float32_t b)14268 uint32_t test_vcalts_f32(float32_t a, float32_t b) {
14269 return (uint32_t)vcalts_f32(a, b);
14270 }
14271
14272 // CHECK-LABEL: @test_vcaltd_f64(
14273 // CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a)
14274 // CHECK: ret i64 [[VCALTD_F64_I]]
test_vcaltd_f64(float64_t a,float64_t b)14275 uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
14276 return (uint64_t)vcaltd_f64(a, b);
14277 }
14278
14279 // CHECK-LABEL: @test_vshrd_n_s64(
14280 // CHECK: [[SHRD_N:%.*]] = ashr i64 %a, 1
14281 // CHECK: ret i64 [[SHRD_N]]
test_vshrd_n_s64(int64_t a)14282 int64_t test_vshrd_n_s64(int64_t a) {
14283 return (int64_t)vshrd_n_s64(a, 1);
14284 }
14285
14286 // CHECK-LABEL: @test_vshr_n_s64(
14287 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14288 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14289 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14290 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)14291 int64x1_t test_vshr_n_s64(int64x1_t a) {
14292 return vshr_n_s64(a, 1);
14293 }
14294
14295 // CHECK-LABEL: @test_vshrd_n_u64(
14296 // CHECK: ret i64 0
test_vshrd_n_u64(uint64_t a)14297 uint64_t test_vshrd_n_u64(uint64_t a) {
14298 return (uint64_t)vshrd_n_u64(a, 64);
14299 }
14300
14301 // CHECK-LABEL: @test_vshrd_n_u64_2(
14302 // CHECK: ret i64 0
test_vshrd_n_u64_2()14303 uint64_t test_vshrd_n_u64_2() {
14304 uint64_t a = UINT64_C(0xf000000000000000);
14305 return vshrd_n_u64(a, 64);
14306 }
14307
14308 // CHECK-LABEL: @test_vshr_n_u64(
14309 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14310 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14311 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14312 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)14313 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14314 return vshr_n_u64(a, 1);
14315 }
14316
14317 // CHECK-LABEL: @test_vrshrd_n_s64(
14318 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
14319 // CHECK: ret i64 [[VRSHR_N]]
test_vrshrd_n_s64(int64_t a)14320 int64_t test_vrshrd_n_s64(int64_t a) {
14321 return (int64_t)vrshrd_n_s64(a, 63);
14322 }
14323
14324 // CHECK-LABEL: @test_vrshr_n_s64(
14325 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14326 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14327 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14328 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)14329 int64x1_t test_vrshr_n_s64(int64x1_t a) {
14330 return vrshr_n_s64(a, 1);
14331 }
14332
14333 // CHECK-LABEL: @test_vrshrd_n_u64(
14334 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
14335 // CHECK: ret i64 [[VRSHR_N]]
test_vrshrd_n_u64(uint64_t a)14336 uint64_t test_vrshrd_n_u64(uint64_t a) {
14337 return (uint64_t)vrshrd_n_u64(a, 63);
14338 }
14339
14340 // CHECK-LABEL: @test_vrshr_n_u64(
14341 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14342 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14343 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14344 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)14345 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
14346 return vrshr_n_u64(a, 1);
14347 }
14348
14349 // CHECK-LABEL: @test_vsrad_n_s64(
14350 // CHECK: [[SHRD_N:%.*]] = ashr i64 %b, 63
14351 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14352 // CHECK: ret i64 [[TMP0]]
test_vsrad_n_s64(int64_t a,int64_t b)14353 int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
14354 return (int64_t)vsrad_n_s64(a, b, 63);
14355 }
14356
14357 // CHECK-LABEL: @test_vsra_n_s64(
14358 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14359 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14360 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14361 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14362 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
14363 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14364 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)14365 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
14366 return vsra_n_s64(a, b, 1);
14367 }
14368
14369 // CHECK-LABEL: @test_vsrad_n_u64(
14370 // CHECK: [[SHRD_N:%.*]] = lshr i64 %b, 63
14371 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
14372 // CHECK: ret i64 [[TMP0]]
test_vsrad_n_u64(uint64_t a,uint64_t b)14373 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
14374 return (uint64_t)vsrad_n_u64(a, b, 63);
14375 }
14376
14377 // CHECK-LABEL: @test_vsrad_n_u64_2(
14378 // CHECK: ret i64 %a
test_vsrad_n_u64_2(uint64_t a,uint64_t b)14379 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
14380 return (uint64_t)vsrad_n_u64(a, b, 64);
14381 }
14382
14383 // CHECK-LABEL: @test_vsra_n_u64(
14384 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14385 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14386 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14387 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14388 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
14389 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14390 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)14391 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
14392 return vsra_n_u64(a, b, 1);
14393 }
14394
14395 // CHECK-LABEL: @test_vrsrad_n_s64(
14396 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
14397 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14398 // CHECK: ret i64 [[TMP1]]
test_vrsrad_n_s64(int64_t a,int64_t b)14399 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
14400 return (int64_t)vrsrad_n_s64(a, b, 63);
14401 }
14402
14403 // CHECK-LABEL: @test_vrsra_n_s64(
14404 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14405 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14406 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14407 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14408 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14409 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14410 // CHECK: ret <1 x i64> [[TMP3]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)14411 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
14412 return vrsra_n_s64(a, b, 1);
14413 }
14414
14415 // CHECK-LABEL: @test_vrsrad_n_u64(
14416 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
14417 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
14418 // CHECK: ret i64 [[TMP1]]
test_vrsrad_n_u64(uint64_t a,uint64_t b)14419 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
14420 return (uint64_t)vrsrad_n_u64(a, b, 63);
14421 }
14422
14423 // CHECK-LABEL: @test_vrsra_n_u64(
14424 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14425 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14426 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14427 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
14428 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14429 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
14430 // CHECK: ret <1 x i64> [[TMP3]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)14431 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
14432 return vrsra_n_u64(a, b, 1);
14433 }
14434
14435 // CHECK-LABEL: @test_vshld_n_s64(
14436 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 1
14437 // CHECK: ret i64 [[SHLD_N]]
test_vshld_n_s64(int64_t a)14438 int64_t test_vshld_n_s64(int64_t a) {
14439 return (int64_t)vshld_n_s64(a, 1);
14440 }
14441
14442 // CHECK-LABEL: @test_vshl_n_s64(
14443 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14444 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14445 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14446 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)14447 int64x1_t test_vshl_n_s64(int64x1_t a) {
14448 return vshl_n_s64(a, 1);
14449 }
14450
14451 // CHECK-LABEL: @test_vshld_n_u64(
14452 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 63
14453 // CHECK: ret i64 [[SHLD_N]]
test_vshld_n_u64(uint64_t a)14454 uint64_t test_vshld_n_u64(uint64_t a) {
14455 return (uint64_t)vshld_n_u64(a, 63);
14456 }
14457
14458 // CHECK-LABEL: @test_vshl_n_u64(
14459 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14460 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14461 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14462 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)14463 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14464 return vshl_n_u64(a, 1);
14465 }
14466
14467 // CHECK-LABEL: @test_vqshlb_n_s8(
14468 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14469 // CHECK: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14470 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
14471 // CHECK: ret i8 [[TMP1]]
test_vqshlb_n_s8(int8_t a)14472 int8_t test_vqshlb_n_s8(int8_t a) {
14473 return (int8_t)vqshlb_n_s8(a, 7);
14474 }
14475
14476 // CHECK-LABEL: @test_vqshlh_n_s16(
14477 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14478 // CHECK: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14479 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
14480 // CHECK: ret i16 [[TMP1]]
test_vqshlh_n_s16(int16_t a)14481 int16_t test_vqshlh_n_s16(int16_t a) {
14482 return (int16_t)vqshlh_n_s16(a, 15);
14483 }
14484
14485 // CHECK-LABEL: @test_vqshls_n_s32(
14486 // CHECK: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
14487 // CHECK: ret i32 [[VQSHLS_N_S32]]
test_vqshls_n_s32(int32_t a)14488 int32_t test_vqshls_n_s32(int32_t a) {
14489 return (int32_t)vqshls_n_s32(a, 31);
14490 }
14491
14492 // CHECK-LABEL: @test_vqshld_n_s64(
14493 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
14494 // CHECK: ret i64 [[VQSHL_N]]
test_vqshld_n_s64(int64_t a)14495 int64_t test_vqshld_n_s64(int64_t a) {
14496 return (int64_t)vqshld_n_s64(a, 63);
14497 }
14498
14499 // CHECK-LABEL: @test_vqshl_n_s8(
14500 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14501 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)14502 int8x8_t test_vqshl_n_s8(int8x8_t a) {
14503 return vqshl_n_s8(a, 0);
14504 }
14505
14506 // CHECK-LABEL: @test_vqshlq_n_s8(
14507 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14508 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)14509 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
14510 return vqshlq_n_s8(a, 0);
14511 }
14512
14513 // CHECK-LABEL: @test_vqshl_n_s16(
14514 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14515 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14516 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14517 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)14518 int16x4_t test_vqshl_n_s16(int16x4_t a) {
14519 return vqshl_n_s16(a, 0);
14520 }
14521
14522 // CHECK-LABEL: @test_vqshlq_n_s16(
14523 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14524 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14525 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14526 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)14527 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
14528 return vqshlq_n_s16(a, 0);
14529 }
14530
14531 // CHECK-LABEL: @test_vqshl_n_s32(
14532 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14533 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14534 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14535 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)14536 int32x2_t test_vqshl_n_s32(int32x2_t a) {
14537 return vqshl_n_s32(a, 0);
14538 }
14539
14540 // CHECK-LABEL: @test_vqshlq_n_s32(
14541 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14542 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14543 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14544 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)14545 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
14546 return vqshlq_n_s32(a, 0);
14547 }
14548
14549 // CHECK-LABEL: @test_vqshlq_n_s64(
14550 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14551 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14552 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14553 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)14554 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
14555 return vqshlq_n_s64(a, 0);
14556 }
14557
14558 // CHECK-LABEL: @test_vqshl_n_u8(
14559 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
14560 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)14561 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
14562 return vqshl_n_u8(a, 0);
14563 }
14564
14565 // CHECK-LABEL: @test_vqshlq_n_u8(
14566 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
14567 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)14568 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
14569 return vqshlq_n_u8(a, 0);
14570 }
14571
14572 // CHECK-LABEL: @test_vqshl_n_u16(
14573 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14574 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14575 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
14576 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)14577 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
14578 return vqshl_n_u16(a, 0);
14579 }
14580
14581 // CHECK-LABEL: @test_vqshlq_n_u16(
14582 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14583 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14584 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
14585 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)14586 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
14587 return vqshlq_n_u16(a, 0);
14588 }
14589
14590 // CHECK-LABEL: @test_vqshl_n_u32(
14591 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14592 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14593 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
14594 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)14595 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
14596 return vqshl_n_u32(a, 0);
14597 }
14598
14599 // CHECK-LABEL: @test_vqshlq_n_u32(
14600 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14601 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14602 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
14603 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)14604 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
14605 return vqshlq_n_u32(a, 0);
14606 }
14607
14608 // CHECK-LABEL: @test_vqshlq_n_u64(
14609 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14610 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14611 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
14612 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)14613 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
14614 return vqshlq_n_u64(a, 0);
14615 }
14616
14617 // CHECK-LABEL: @test_vqshl_n_s64(
14618 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14619 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14620 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14621 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)14622 int64x1_t test_vqshl_n_s64(int64x1_t a) {
14623 return vqshl_n_s64(a, 1);
14624 }
14625
14626 // CHECK-LABEL: @test_vqshlb_n_u8(
14627 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14628 // CHECK: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14629 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
14630 // CHECK: ret i8 [[TMP1]]
test_vqshlb_n_u8(uint8_t a)14631 uint8_t test_vqshlb_n_u8(uint8_t a) {
14632 return (uint8_t)vqshlb_n_u8(a, 7);
14633 }
14634
14635 // CHECK-LABEL: @test_vqshlh_n_u16(
14636 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14637 // CHECK: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14638 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
14639 // CHECK: ret i16 [[TMP1]]
test_vqshlh_n_u16(uint16_t a)14640 uint16_t test_vqshlh_n_u16(uint16_t a) {
14641 return (uint16_t)vqshlh_n_u16(a, 15);
14642 }
14643
14644 // CHECK-LABEL: @test_vqshls_n_u32(
14645 // CHECK: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
14646 // CHECK: ret i32 [[VQSHLS_N_U32]]
test_vqshls_n_u32(uint32_t a)14647 uint32_t test_vqshls_n_u32(uint32_t a) {
14648 return (uint32_t)vqshls_n_u32(a, 31);
14649 }
14650
14651 // CHECK-LABEL: @test_vqshld_n_u64(
14652 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
14653 // CHECK: ret i64 [[VQSHL_N]]
test_vqshld_n_u64(uint64_t a)14654 uint64_t test_vqshld_n_u64(uint64_t a) {
14655 return (uint64_t)vqshld_n_u64(a, 63);
14656 }
14657
14658 // CHECK-LABEL: @test_vqshl_n_u64(
14659 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14660 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14661 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
14662 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)14663 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
14664 return vqshl_n_u64(a, 1);
14665 }
14666
14667 // CHECK-LABEL: @test_vqshlub_n_s8(
14668 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
14669 // CHECK: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
14670 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
14671 // CHECK: ret i8 [[TMP1]]
test_vqshlub_n_s8(int8_t a)14672 int8_t test_vqshlub_n_s8(int8_t a) {
14673 return (int8_t)vqshlub_n_s8(a, 7);
14674 }
14675
14676 // CHECK-LABEL: @test_vqshluh_n_s16(
14677 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
14678 // CHECK: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
14679 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
14680 // CHECK: ret i16 [[TMP1]]
test_vqshluh_n_s16(int16_t a)14681 int16_t test_vqshluh_n_s16(int16_t a) {
14682 return (int16_t)vqshluh_n_s16(a, 15);
14683 }
14684
14685 // CHECK-LABEL: @test_vqshlus_n_s32(
14686 // CHECK: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
14687 // CHECK: ret i32 [[VQSHLUS_N_S32]]
test_vqshlus_n_s32(int32_t a)14688 int32_t test_vqshlus_n_s32(int32_t a) {
14689 return (int32_t)vqshlus_n_s32(a, 31);
14690 }
14691
14692 // CHECK-LABEL: @test_vqshlud_n_s64(
14693 // CHECK: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
14694 // CHECK: ret i64 [[VQSHLU_N]]
test_vqshlud_n_s64(int64_t a)14695 int64_t test_vqshlud_n_s64(int64_t a) {
14696 return (int64_t)vqshlud_n_s64(a, 63);
14697 }
14698
14699 // CHECK-LABEL: @test_vqshlu_n_s64(
14700 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14701 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14702 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
14703 // CHECK: ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)14704 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
14705 return vqshlu_n_s64(a, 1);
14706 }
14707
14708 // CHECK-LABEL: @test_vsrid_n_s64(
14709 // CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14710 // CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14711 // CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
14712 // CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
14713 // CHECK: ret i64 [[VSRID_N_S643]]
test_vsrid_n_s64(int64_t a,int64_t b)14714 int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
14715 return (int64_t)vsrid_n_s64(a, b, 63);
14716 }
14717
14718 // CHECK-LABEL: @test_vsri_n_s64(
14719 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14720 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14721 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14722 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14723 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14724 // CHECK: ret <1 x i64> [[VSRI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)14725 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
14726 return vsri_n_s64(a, b, 1);
14727 }
14728
14729 // CHECK-LABEL: @test_vsrid_n_u64(
14730 // CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14731 // CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14732 // CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
14733 // CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
14734 // CHECK: ret i64 [[VSRID_N_U643]]
test_vsrid_n_u64(uint64_t a,uint64_t b)14735 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
14736 return (uint64_t)vsrid_n_u64(a, b, 63);
14737 }
14738
14739 // CHECK-LABEL: @test_vsri_n_u64(
14740 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14741 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14742 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14743 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14744 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
14745 // CHECK: ret <1 x i64> [[VSRI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)14746 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
14747 return vsri_n_u64(a, b, 1);
14748 }
14749
14750 // CHECK-LABEL: @test_vslid_n_s64(
14751 // CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
14752 // CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
14753 // CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
14754 // CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
14755 // CHECK: ret i64 [[VSLID_N_S643]]
test_vslid_n_s64(int64_t a,int64_t b)14756 int64_t test_vslid_n_s64(int64_t a, int64_t b) {
14757 return (int64_t)vslid_n_s64(a, b, 63);
14758 }
14759
14760 // CHECK-LABEL: @test_vsli_n_s64(
14761 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14762 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14763 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14764 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14765 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14766 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)14767 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14768 return vsli_n_s64(a, b, 1);
14769 }
14770
14771 // CHECK-LABEL: @test_vslid_n_u64(
14772 // CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
14773 // CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
14774 // CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
14775 // CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
14776 // CHECK: ret i64 [[VSLID_N_U643]]
test_vslid_n_u64(uint64_t a,uint64_t b)14777 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
14778 return (uint64_t)vslid_n_u64(a, b, 63);
14779 }
14780
14781 // CHECK-LABEL: @test_vsli_n_u64(
14782 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14783 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14784 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14785 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14786 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14787 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)14788 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
14789 return vsli_n_u64(a, b, 1);
14790 }
14791
14792 // CHECK-LABEL: @test_vqshrnh_n_s16(
14793 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14794 // CHECK: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14795 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
14796 // CHECK: ret i8 [[TMP1]]
test_vqshrnh_n_s16(int16_t a)14797 int8_t test_vqshrnh_n_s16(int16_t a) {
14798 return (int8_t)vqshrnh_n_s16(a, 8);
14799 }
14800
14801 // CHECK-LABEL: @test_vqshrns_n_s32(
14802 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14803 // CHECK: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14804 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
14805 // CHECK: ret i16 [[TMP1]]
test_vqshrns_n_s32(int32_t a)14806 int16_t test_vqshrns_n_s32(int32_t a) {
14807 return (int16_t)vqshrns_n_s32(a, 16);
14808 }
14809
14810 // CHECK-LABEL: @test_vqshrnd_n_s64(
14811 // CHECK: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
14812 // CHECK: ret i32 [[VQSHRND_N_S64]]
test_vqshrnd_n_s64(int64_t a)14813 int32_t test_vqshrnd_n_s64(int64_t a) {
14814 return (int32_t)vqshrnd_n_s64(a, 32);
14815 }
14816
14817 // CHECK-LABEL: @test_vqshrnh_n_u16(
14818 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14819 // CHECK: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14820 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
14821 // CHECK: ret i8 [[TMP1]]
test_vqshrnh_n_u16(uint16_t a)14822 uint8_t test_vqshrnh_n_u16(uint16_t a) {
14823 return (uint8_t)vqshrnh_n_u16(a, 8);
14824 }
14825
14826 // CHECK-LABEL: @test_vqshrns_n_u32(
14827 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14828 // CHECK: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14829 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
14830 // CHECK: ret i16 [[TMP1]]
test_vqshrns_n_u32(uint32_t a)14831 uint16_t test_vqshrns_n_u32(uint32_t a) {
14832 return (uint16_t)vqshrns_n_u32(a, 16);
14833 }
14834
14835 // CHECK-LABEL: @test_vqshrnd_n_u64(
14836 // CHECK: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
14837 // CHECK: ret i32 [[VQSHRND_N_U64]]
test_vqshrnd_n_u64(uint64_t a)14838 uint32_t test_vqshrnd_n_u64(uint64_t a) {
14839 return (uint32_t)vqshrnd_n_u64(a, 32);
14840 }
14841
14842 // CHECK-LABEL: @test_vqrshrnh_n_s16(
14843 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14844 // CHECK: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14845 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
14846 // CHECK: ret i8 [[TMP1]]
test_vqrshrnh_n_s16(int16_t a)14847 int8_t test_vqrshrnh_n_s16(int16_t a) {
14848 return (int8_t)vqrshrnh_n_s16(a, 8);
14849 }
14850
14851 // CHECK-LABEL: @test_vqrshrns_n_s32(
14852 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14853 // CHECK: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14854 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
14855 // CHECK: ret i16 [[TMP1]]
test_vqrshrns_n_s32(int32_t a)14856 int16_t test_vqrshrns_n_s32(int32_t a) {
14857 return (int16_t)vqrshrns_n_s32(a, 16);
14858 }
14859
14860 // CHECK-LABEL: @test_vqrshrnd_n_s64(
14861 // CHECK: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
14862 // CHECK: ret i32 [[VQRSHRND_N_S64]]
test_vqrshrnd_n_s64(int64_t a)14863 int32_t test_vqrshrnd_n_s64(int64_t a) {
14864 return (int32_t)vqrshrnd_n_s64(a, 32);
14865 }
14866
14867 // CHECK-LABEL: @test_vqrshrnh_n_u16(
14868 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14869 // CHECK: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14870 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
14871 // CHECK: ret i8 [[TMP1]]
test_vqrshrnh_n_u16(uint16_t a)14872 uint8_t test_vqrshrnh_n_u16(uint16_t a) {
14873 return (uint8_t)vqrshrnh_n_u16(a, 8);
14874 }
14875
14876 // CHECK-LABEL: @test_vqrshrns_n_u32(
14877 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14878 // CHECK: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14879 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
14880 // CHECK: ret i16 [[TMP1]]
test_vqrshrns_n_u32(uint32_t a)14881 uint16_t test_vqrshrns_n_u32(uint32_t a) {
14882 return (uint16_t)vqrshrns_n_u32(a, 16);
14883 }
14884
14885 // CHECK-LABEL: @test_vqrshrnd_n_u64(
14886 // CHECK: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
14887 // CHECK: ret i32 [[VQRSHRND_N_U64]]
test_vqrshrnd_n_u64(uint64_t a)14888 uint32_t test_vqrshrnd_n_u64(uint64_t a) {
14889 return (uint32_t)vqrshrnd_n_u64(a, 32);
14890 }
14891
14892 // CHECK-LABEL: @test_vqshrunh_n_s16(
14893 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14894 // CHECK: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14895 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
14896 // CHECK: ret i8 [[TMP1]]
test_vqshrunh_n_s16(int16_t a)14897 int8_t test_vqshrunh_n_s16(int16_t a) {
14898 return (int8_t)vqshrunh_n_s16(a, 8);
14899 }
14900
14901 // CHECK-LABEL: @test_vqshruns_n_s32(
14902 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14903 // CHECK: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14904 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
14905 // CHECK: ret i16 [[TMP1]]
test_vqshruns_n_s32(int32_t a)14906 int16_t test_vqshruns_n_s32(int32_t a) {
14907 return (int16_t)vqshruns_n_s32(a, 16);
14908 }
14909
14910 // CHECK-LABEL: @test_vqshrund_n_s64(
14911 // CHECK: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
14912 // CHECK: ret i32 [[VQSHRUND_N_S64]]
test_vqshrund_n_s64(int64_t a)14913 int32_t test_vqshrund_n_s64(int64_t a) {
14914 return (int32_t)vqshrund_n_s64(a, 32);
14915 }
14916
14917 // CHECK-LABEL: @test_vqrshrunh_n_s16(
14918 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
14919 // CHECK: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14920 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
14921 // CHECK: ret i8 [[TMP1]]
test_vqrshrunh_n_s16(int16_t a)14922 int8_t test_vqrshrunh_n_s16(int16_t a) {
14923 return (int8_t)vqrshrunh_n_s16(a, 8);
14924 }
14925
14926 // CHECK-LABEL: @test_vqrshruns_n_s32(
14927 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
14928 // CHECK: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14929 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
14930 // CHECK: ret i16 [[TMP1]]
test_vqrshruns_n_s32(int32_t a)14931 int16_t test_vqrshruns_n_s32(int32_t a) {
14932 return (int16_t)vqrshruns_n_s32(a, 16);
14933 }
14934
14935 // CHECK-LABEL: @test_vqrshrund_n_s64(
14936 // CHECK: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
14937 // CHECK: ret i32 [[VQRSHRUND_N_S64]]
test_vqrshrund_n_s64(int64_t a)14938 int32_t test_vqrshrund_n_s64(int64_t a) {
14939 return (int32_t)vqrshrund_n_s64(a, 32);
14940 }
14941
14942 // CHECK-LABEL: @test_vcvts_n_f32_s32(
14943 // CHECK: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
14944 // CHECK: ret float [[VCVTS_N_F32_S32]]
test_vcvts_n_f32_s32(int32_t a)14945 float32_t test_vcvts_n_f32_s32(int32_t a) {
14946 return vcvts_n_f32_s32(a, 1);
14947 }
14948
14949 // CHECK-LABEL: @test_vcvtd_n_f64_s64(
14950 // CHECK: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
14951 // CHECK: ret double [[VCVTD_N_F64_S64]]
test_vcvtd_n_f64_s64(int64_t a)14952 float64_t test_vcvtd_n_f64_s64(int64_t a) {
14953 return vcvtd_n_f64_s64(a, 1);
14954 }
14955
14956 // CHECK-LABEL: @test_vcvts_n_f32_u32(
14957 // CHECK: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
14958 // CHECK: ret float [[VCVTS_N_F32_U32]]
test_vcvts_n_f32_u32(uint32_t a)14959 float32_t test_vcvts_n_f32_u32(uint32_t a) {
14960 return vcvts_n_f32_u32(a, 32);
14961 }
14962
14963 // CHECK-LABEL: @test_vcvtd_n_f64_u64(
14964 // CHECK: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
14965 // CHECK: ret double [[VCVTD_N_F64_U64]]
test_vcvtd_n_f64_u64(uint64_t a)14966 float64_t test_vcvtd_n_f64_u64(uint64_t a) {
14967 return vcvtd_n_f64_u64(a, 64);
14968 }
14969
14970 // CHECK-LABEL: @test_vcvts_n_s32_f32(
14971 // CHECK: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
14972 // CHECK: ret i32 [[VCVTS_N_S32_F32]]
test_vcvts_n_s32_f32(float32_t a)14973 int32_t test_vcvts_n_s32_f32(float32_t a) {
14974 return (int32_t)vcvts_n_s32_f32(a, 1);
14975 }
14976
14977 // CHECK-LABEL: @test_vcvtd_n_s64_f64(
14978 // CHECK: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
14979 // CHECK: ret i64 [[VCVTD_N_S64_F64]]
test_vcvtd_n_s64_f64(float64_t a)14980 int64_t test_vcvtd_n_s64_f64(float64_t a) {
14981 return (int64_t)vcvtd_n_s64_f64(a, 1);
14982 }
14983
14984 // CHECK-LABEL: @test_vcvts_n_u32_f32(
14985 // CHECK: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
14986 // CHECK: ret i32 [[VCVTS_N_U32_F32]]
test_vcvts_n_u32_f32(float32_t a)14987 uint32_t test_vcvts_n_u32_f32(float32_t a) {
14988 return (uint32_t)vcvts_n_u32_f32(a, 32);
14989 }
14990
14991 // CHECK-LABEL: @test_vcvtd_n_u64_f64(
14992 // CHECK: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
14993 // CHECK: ret i64 [[VCVTD_N_U64_F64]]
test_vcvtd_n_u64_f64(float64_t a)14994 uint64_t test_vcvtd_n_u64_f64(float64_t a) {
14995 return (uint64_t)vcvtd_n_u64_f64(a, 64);
14996 }
14997
14998 // CHECK-LABEL: @test_vreinterpret_s8_s16(
14999 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15000 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)15001 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
15002 return vreinterpret_s8_s16(a);
15003 }
15004
15005 // CHECK-LABEL: @test_vreinterpret_s8_s32(
15006 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15007 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)15008 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
15009 return vreinterpret_s8_s32(a);
15010 }
15011
15012 // CHECK-LABEL: @test_vreinterpret_s8_s64(
15013 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15014 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)15015 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
15016 return vreinterpret_s8_s64(a);
15017 }
15018
15019 // CHECK-LABEL: @test_vreinterpret_s8_u8(
15020 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)15021 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
15022 return vreinterpret_s8_u8(a);
15023 }
15024
15025 // CHECK-LABEL: @test_vreinterpret_s8_u16(
15026 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15027 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)15028 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
15029 return vreinterpret_s8_u16(a);
15030 }
15031
15032 // CHECK-LABEL: @test_vreinterpret_s8_u32(
15033 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15034 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)15035 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
15036 return vreinterpret_s8_u32(a);
15037 }
15038
15039 // CHECK-LABEL: @test_vreinterpret_s8_u64(
15040 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15041 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)15042 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
15043 return vreinterpret_s8_u64(a);
15044 }
15045
15046 // CHECK-LABEL: @test_vreinterpret_s8_f16(
15047 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15048 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)15049 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
15050 return vreinterpret_s8_f16(a);
15051 }
15052
15053 // CHECK-LABEL: @test_vreinterpret_s8_f32(
15054 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15055 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)15056 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
15057 return vreinterpret_s8_f32(a);
15058 }
15059
15060 // CHECK-LABEL: @test_vreinterpret_s8_f64(
15061 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15062 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f64(float64x1_t a)15063 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
15064 return vreinterpret_s8_f64(a);
15065 }
15066
15067 // CHECK-LABEL: @test_vreinterpret_s8_p8(
15068 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)15069 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
15070 return vreinterpret_s8_p8(a);
15071 }
15072
15073 // CHECK-LABEL: @test_vreinterpret_s8_p16(
15074 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15075 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)15076 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
15077 return vreinterpret_s8_p16(a);
15078 }
15079
15080 // CHECK-LABEL: @test_vreinterpret_s8_p64(
15081 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15082 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p64(poly64x1_t a)15083 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
15084 return vreinterpret_s8_p64(a);
15085 }
15086
15087 // CHECK-LABEL: @test_vreinterpret_s16_s8(
15088 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15089 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)15090 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
15091 return vreinterpret_s16_s8(a);
15092 }
15093
15094 // CHECK-LABEL: @test_vreinterpret_s16_s32(
15095 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15096 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)15097 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
15098 return vreinterpret_s16_s32(a);
15099 }
15100
15101 // CHECK-LABEL: @test_vreinterpret_s16_s64(
15102 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15103 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)15104 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
15105 return vreinterpret_s16_s64(a);
15106 }
15107
15108 // CHECK-LABEL: @test_vreinterpret_s16_u8(
15109 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15110 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)15111 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
15112 return vreinterpret_s16_u8(a);
15113 }
15114
15115 // CHECK-LABEL: @test_vreinterpret_s16_u16(
15116 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)15117 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
15118 return vreinterpret_s16_u16(a);
15119 }
15120
15121 // CHECK-LABEL: @test_vreinterpret_s16_u32(
15122 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15123 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)15124 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
15125 return vreinterpret_s16_u32(a);
15126 }
15127
15128 // CHECK-LABEL: @test_vreinterpret_s16_u64(
15129 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15130 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)15131 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
15132 return vreinterpret_s16_u64(a);
15133 }
15134
15135 // CHECK-LABEL: @test_vreinterpret_s16_f16(
15136 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15137 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)15138 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
15139 return vreinterpret_s16_f16(a);
15140 }
15141
15142 // CHECK-LABEL: @test_vreinterpret_s16_f32(
15143 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15144 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)15145 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
15146 return vreinterpret_s16_f32(a);
15147 }
15148
15149 // CHECK-LABEL: @test_vreinterpret_s16_f64(
15150 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15151 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f64(float64x1_t a)15152 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
15153 return vreinterpret_s16_f64(a);
15154 }
15155
15156 // CHECK-LABEL: @test_vreinterpret_s16_p8(
15157 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15158 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)15159 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
15160 return vreinterpret_s16_p8(a);
15161 }
15162
15163 // CHECK-LABEL: @test_vreinterpret_s16_p16(
15164 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)15165 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
15166 return vreinterpret_s16_p16(a);
15167 }
15168
15169 // CHECK-LABEL: @test_vreinterpret_s16_p64(
15170 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15171 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p64(poly64x1_t a)15172 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
15173 return vreinterpret_s16_p64(a);
15174 }
15175
15176 // CHECK-LABEL: @test_vreinterpret_s32_s8(
15177 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15178 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)15179 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
15180 return vreinterpret_s32_s8(a);
15181 }
15182
15183 // CHECK-LABEL: @test_vreinterpret_s32_s16(
15184 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15185 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)15186 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
15187 return vreinterpret_s32_s16(a);
15188 }
15189
15190 // CHECK-LABEL: @test_vreinterpret_s32_s64(
15191 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15192 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)15193 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
15194 return vreinterpret_s32_s64(a);
15195 }
15196
15197 // CHECK-LABEL: @test_vreinterpret_s32_u8(
15198 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15199 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)15200 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
15201 return vreinterpret_s32_u8(a);
15202 }
15203
15204 // CHECK-LABEL: @test_vreinterpret_s32_u16(
15205 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15206 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)15207 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
15208 return vreinterpret_s32_u16(a);
15209 }
15210
15211 // CHECK-LABEL: @test_vreinterpret_s32_u32(
15212 // CHECK: ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)15213 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
15214 return vreinterpret_s32_u32(a);
15215 }
15216
15217 // CHECK-LABEL: @test_vreinterpret_s32_u64(
15218 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15219 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)15220 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
15221 return vreinterpret_s32_u64(a);
15222 }
15223
15224 // CHECK-LABEL: @test_vreinterpret_s32_f16(
15225 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15226 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)15227 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
15228 return vreinterpret_s32_f16(a);
15229 }
15230
15231 // CHECK-LABEL: @test_vreinterpret_s32_f32(
15232 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15233 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)15234 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
15235 return vreinterpret_s32_f32(a);
15236 }
15237
15238 // CHECK-LABEL: @test_vreinterpret_s32_f64(
15239 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15240 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f64(float64x1_t a)15241 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
15242 return vreinterpret_s32_f64(a);
15243 }
15244
15245 // CHECK-LABEL: @test_vreinterpret_s32_p8(
15246 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15247 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)15248 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
15249 return vreinterpret_s32_p8(a);
15250 }
15251
15252 // CHECK-LABEL: @test_vreinterpret_s32_p16(
15253 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15254 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)15255 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
15256 return vreinterpret_s32_p16(a);
15257 }
15258
15259 // CHECK-LABEL: @test_vreinterpret_s32_p64(
15260 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15261 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p64(poly64x1_t a)15262 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
15263 return vreinterpret_s32_p64(a);
15264 }
15265
15266 // CHECK-LABEL: @test_vreinterpret_s64_s8(
15267 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15268 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)15269 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
15270 return vreinterpret_s64_s8(a);
15271 }
15272
15273 // CHECK-LABEL: @test_vreinterpret_s64_s16(
15274 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15275 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)15276 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
15277 return vreinterpret_s64_s16(a);
15278 }
15279
15280 // CHECK-LABEL: @test_vreinterpret_s64_s32(
15281 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15282 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)15283 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
15284 return vreinterpret_s64_s32(a);
15285 }
15286
15287 // CHECK-LABEL: @test_vreinterpret_s64_u8(
15288 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15289 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)15290 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
15291 return vreinterpret_s64_u8(a);
15292 }
15293
15294 // CHECK-LABEL: @test_vreinterpret_s64_u16(
15295 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15296 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)15297 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
15298 return vreinterpret_s64_u16(a);
15299 }
15300
15301 // CHECK-LABEL: @test_vreinterpret_s64_u32(
15302 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15303 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)15304 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
15305 return vreinterpret_s64_u32(a);
15306 }
15307
15308 // CHECK-LABEL: @test_vreinterpret_s64_u64(
15309 // CHECK: ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)15310 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
15311 return vreinterpret_s64_u64(a);
15312 }
15313
15314 // CHECK-LABEL: @test_vreinterpret_s64_f16(
15315 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15316 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)15317 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
15318 return vreinterpret_s64_f16(a);
15319 }
15320
15321 // CHECK-LABEL: @test_vreinterpret_s64_f32(
15322 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15323 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)15324 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
15325 return vreinterpret_s64_f32(a);
15326 }
15327
15328 // CHECK-LABEL: @test_vreinterpret_s64_f64(
15329 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15330 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f64(float64x1_t a)15331 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
15332 return vreinterpret_s64_f64(a);
15333 }
15334
15335 // CHECK-LABEL: @test_vreinterpret_s64_p8(
15336 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15337 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)15338 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
15339 return vreinterpret_s64_p8(a);
15340 }
15341
15342 // CHECK-LABEL: @test_vreinterpret_s64_p16(
15343 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15344 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)15345 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
15346 return vreinterpret_s64_p16(a);
15347 }
15348
15349 // CHECK-LABEL: @test_vreinterpret_s64_p64(
15350 // CHECK: ret <1 x i64> %a
test_vreinterpret_s64_p64(poly64x1_t a)15351 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
15352 return vreinterpret_s64_p64(a);
15353 }
15354
15355 // CHECK-LABEL: @test_vreinterpret_u8_s8(
15356 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)15357 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
15358 return vreinterpret_u8_s8(a);
15359 }
15360
15361 // CHECK-LABEL: @test_vreinterpret_u8_s16(
15362 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15363 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)15364 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
15365 return vreinterpret_u8_s16(a);
15366 }
15367
15368 // CHECK-LABEL: @test_vreinterpret_u8_s32(
15369 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15370 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)15371 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
15372 return vreinterpret_u8_s32(a);
15373 }
15374
15375 // CHECK-LABEL: @test_vreinterpret_u8_s64(
15376 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15377 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)15378 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
15379 return vreinterpret_u8_s64(a);
15380 }
15381
15382 // CHECK-LABEL: @test_vreinterpret_u8_u16(
15383 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15384 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)15385 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
15386 return vreinterpret_u8_u16(a);
15387 }
15388
15389 // CHECK-LABEL: @test_vreinterpret_u8_u32(
15390 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15391 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)15392 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
15393 return vreinterpret_u8_u32(a);
15394 }
15395
15396 // CHECK-LABEL: @test_vreinterpret_u8_u64(
15397 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15398 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)15399 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
15400 return vreinterpret_u8_u64(a);
15401 }
15402
15403 // CHECK-LABEL: @test_vreinterpret_u8_f16(
15404 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15405 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)15406 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
15407 return vreinterpret_u8_f16(a);
15408 }
15409
15410 // CHECK-LABEL: @test_vreinterpret_u8_f32(
15411 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15412 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)15413 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
15414 return vreinterpret_u8_f32(a);
15415 }
15416
15417 // CHECK-LABEL: @test_vreinterpret_u8_f64(
15418 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15419 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f64(float64x1_t a)15420 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
15421 return vreinterpret_u8_f64(a);
15422 }
15423
15424 // CHECK-LABEL: @test_vreinterpret_u8_p8(
15425 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)15426 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
15427 return vreinterpret_u8_p8(a);
15428 }
15429
15430 // CHECK-LABEL: @test_vreinterpret_u8_p16(
15431 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15432 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)15433 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
15434 return vreinterpret_u8_p16(a);
15435 }
15436
15437 // CHECK-LABEL: @test_vreinterpret_u8_p64(
15438 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15439 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p64(poly64x1_t a)15440 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
15441 return vreinterpret_u8_p64(a);
15442 }
15443
15444 // CHECK-LABEL: @test_vreinterpret_u16_s8(
15445 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15446 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)15447 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
15448 return vreinterpret_u16_s8(a);
15449 }
15450
15451 // CHECK-LABEL: @test_vreinterpret_u16_s16(
15452 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)15453 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
15454 return vreinterpret_u16_s16(a);
15455 }
15456
15457 // CHECK-LABEL: @test_vreinterpret_u16_s32(
15458 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15459 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)15460 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
15461 return vreinterpret_u16_s32(a);
15462 }
15463
15464 // CHECK-LABEL: @test_vreinterpret_u16_s64(
15465 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15466 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)15467 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
15468 return vreinterpret_u16_s64(a);
15469 }
15470
15471 // CHECK-LABEL: @test_vreinterpret_u16_u8(
15472 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15473 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)15474 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
15475 return vreinterpret_u16_u8(a);
15476 }
15477
15478 // CHECK-LABEL: @test_vreinterpret_u16_u32(
15479 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15480 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)15481 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
15482 return vreinterpret_u16_u32(a);
15483 }
15484
15485 // CHECK-LABEL: @test_vreinterpret_u16_u64(
15486 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15487 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)15488 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
15489 return vreinterpret_u16_u64(a);
15490 }
15491
15492 // CHECK-LABEL: @test_vreinterpret_u16_f16(
15493 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15494 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)15495 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
15496 return vreinterpret_u16_f16(a);
15497 }
15498
15499 // CHECK-LABEL: @test_vreinterpret_u16_f32(
15500 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15501 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)15502 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
15503 return vreinterpret_u16_f32(a);
15504 }
15505
15506 // CHECK-LABEL: @test_vreinterpret_u16_f64(
15507 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15508 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f64(float64x1_t a)15509 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
15510 return vreinterpret_u16_f64(a);
15511 }
15512
15513 // CHECK-LABEL: @test_vreinterpret_u16_p8(
15514 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15515 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)15516 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
15517 return vreinterpret_u16_p8(a);
15518 }
15519
15520 // CHECK-LABEL: @test_vreinterpret_u16_p16(
15521 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)15522 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
15523 return vreinterpret_u16_p16(a);
15524 }
15525
15526 // CHECK-LABEL: @test_vreinterpret_u16_p64(
15527 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15528 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p64(poly64x1_t a)15529 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
15530 return vreinterpret_u16_p64(a);
15531 }
15532
15533 // CHECK-LABEL: @test_vreinterpret_u32_s8(
15534 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15535 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)15536 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
15537 return vreinterpret_u32_s8(a);
15538 }
15539
15540 // CHECK-LABEL: @test_vreinterpret_u32_s16(
15541 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15542 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)15543 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
15544 return vreinterpret_u32_s16(a);
15545 }
15546
15547 // CHECK-LABEL: @test_vreinterpret_u32_s32(
15548 // CHECK: ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)15549 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
15550 return vreinterpret_u32_s32(a);
15551 }
15552
15553 // CHECK-LABEL: @test_vreinterpret_u32_s64(
15554 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15555 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)15556 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
15557 return vreinterpret_u32_s64(a);
15558 }
15559
15560 // CHECK-LABEL: @test_vreinterpret_u32_u8(
15561 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15562 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)15563 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
15564 return vreinterpret_u32_u8(a);
15565 }
15566
15567 // CHECK-LABEL: @test_vreinterpret_u32_u16(
15568 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15569 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)15570 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
15571 return vreinterpret_u32_u16(a);
15572 }
15573
15574 // CHECK-LABEL: @test_vreinterpret_u32_u64(
15575 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15576 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)15577 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
15578 return vreinterpret_u32_u64(a);
15579 }
15580
15581 // CHECK-LABEL: @test_vreinterpret_u32_f16(
15582 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
15583 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)15584 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
15585 return vreinterpret_u32_f16(a);
15586 }
15587
15588 // CHECK-LABEL: @test_vreinterpret_u32_f32(
15589 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
15590 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)15591 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
15592 return vreinterpret_u32_f32(a);
15593 }
15594
15595 // CHECK-LABEL: @test_vreinterpret_u32_f64(
15596 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
15597 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f64(float64x1_t a)15598 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
15599 return vreinterpret_u32_f64(a);
15600 }
15601
15602 // CHECK-LABEL: @test_vreinterpret_u32_p8(
15603 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
15604 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)15605 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
15606 return vreinterpret_u32_p8(a);
15607 }
15608
15609 // CHECK-LABEL: @test_vreinterpret_u32_p16(
15610 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
15611 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)15612 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
15613 return vreinterpret_u32_p16(a);
15614 }
15615
15616 // CHECK-LABEL: @test_vreinterpret_u32_p64(
15617 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
15618 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p64(poly64x1_t a)15619 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
15620 return vreinterpret_u32_p64(a);
15621 }
15622
15623 // CHECK-LABEL: @test_vreinterpret_u64_s8(
15624 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15625 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)15626 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
15627 return vreinterpret_u64_s8(a);
15628 }
15629
15630 // CHECK-LABEL: @test_vreinterpret_u64_s16(
15631 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15632 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)15633 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
15634 return vreinterpret_u64_s16(a);
15635 }
15636
15637 // CHECK-LABEL: @test_vreinterpret_u64_s32(
15638 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15639 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)15640 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
15641 return vreinterpret_u64_s32(a);
15642 }
15643
15644 // CHECK-LABEL: @test_vreinterpret_u64_s64(
15645 // CHECK: ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)15646 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
15647 return vreinterpret_u64_s64(a);
15648 }
15649
15650 // CHECK-LABEL: @test_vreinterpret_u64_u8(
15651 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15652 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)15653 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
15654 return vreinterpret_u64_u8(a);
15655 }
15656
15657 // CHECK-LABEL: @test_vreinterpret_u64_u16(
15658 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15659 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)15660 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
15661 return vreinterpret_u64_u16(a);
15662 }
15663
15664 // CHECK-LABEL: @test_vreinterpret_u64_u32(
15665 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15666 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)15667 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
15668 return vreinterpret_u64_u32(a);
15669 }
15670
15671 // CHECK-LABEL: @test_vreinterpret_u64_f16(
15672 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15673 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)15674 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
15675 return vreinterpret_u64_f16(a);
15676 }
15677
15678 // CHECK-LABEL: @test_vreinterpret_u64_f32(
15679 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15680 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)15681 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
15682 return vreinterpret_u64_f32(a);
15683 }
15684
15685 // CHECK-LABEL: @test_vreinterpret_u64_f64(
15686 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15687 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f64(float64x1_t a)15688 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
15689 return vreinterpret_u64_f64(a);
15690 }
15691
15692 // CHECK-LABEL: @test_vreinterpret_u64_p8(
15693 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15694 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)15695 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
15696 return vreinterpret_u64_p8(a);
15697 }
15698
15699 // CHECK-LABEL: @test_vreinterpret_u64_p16(
15700 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15701 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)15702 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
15703 return vreinterpret_u64_p16(a);
15704 }
15705
15706 // CHECK-LABEL: @test_vreinterpret_u64_p64(
15707 // CHECK: ret <1 x i64> %a
test_vreinterpret_u64_p64(poly64x1_t a)15708 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
15709 return vreinterpret_u64_p64(a);
15710 }
15711
15712 // CHECK-LABEL: @test_vreinterpret_f16_s8(
15713 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15714 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)15715 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
15716 return vreinterpret_f16_s8(a);
15717 }
15718
15719 // CHECK-LABEL: @test_vreinterpret_f16_s16(
15720 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15721 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)15722 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
15723 return vreinterpret_f16_s16(a);
15724 }
15725
15726 // CHECK-LABEL: @test_vreinterpret_f16_s32(
15727 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15728 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)15729 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
15730 return vreinterpret_f16_s32(a);
15731 }
15732
15733 // CHECK-LABEL: @test_vreinterpret_f16_s64(
15734 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15735 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)15736 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
15737 return vreinterpret_f16_s64(a);
15738 }
15739
15740 // CHECK-LABEL: @test_vreinterpret_f16_u8(
15741 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15742 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)15743 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
15744 return vreinterpret_f16_u8(a);
15745 }
15746
15747 // CHECK-LABEL: @test_vreinterpret_f16_u16(
15748 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15749 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)15750 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
15751 return vreinterpret_f16_u16(a);
15752 }
15753
15754 // CHECK-LABEL: @test_vreinterpret_f16_u32(
15755 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
15756 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)15757 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
15758 return vreinterpret_f16_u32(a);
15759 }
15760
15761 // CHECK-LABEL: @test_vreinterpret_f16_u64(
15762 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15763 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)15764 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
15765 return vreinterpret_f16_u64(a);
15766 }
15767
15768 // CHECK-LABEL: @test_vreinterpret_f16_f32(
15769 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
15770 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)15771 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
15772 return vreinterpret_f16_f32(a);
15773 }
15774
15775 // CHECK-LABEL: @test_vreinterpret_f16_f64(
15776 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
15777 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f64(float64x1_t a)15778 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
15779 return vreinterpret_f16_f64(a);
15780 }
15781
15782 // CHECK-LABEL: @test_vreinterpret_f16_p8(
15783 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
15784 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)15785 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
15786 return vreinterpret_f16_p8(a);
15787 }
15788
15789 // CHECK-LABEL: @test_vreinterpret_f16_p16(
15790 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15791 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)15792 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
15793 return vreinterpret_f16_p16(a);
15794 }
15795
15796 // CHECK-LABEL: @test_vreinterpret_f16_p64(
15797 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15798 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p64(poly64x1_t a)15799 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
15800 return vreinterpret_f16_p64(a);
15801 }
15802
15803 // CHECK-LABEL: @test_vreinterpret_f32_s8(
15804 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15805 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)15806 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
15807 return vreinterpret_f32_s8(a);
15808 }
15809
15810 // CHECK-LABEL: @test_vreinterpret_f32_s16(
15811 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15812 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)15813 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
15814 return vreinterpret_f32_s16(a);
15815 }
15816
15817 // CHECK-LABEL: @test_vreinterpret_f32_s32(
15818 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15819 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)15820 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
15821 return vreinterpret_f32_s32(a);
15822 }
15823
15824 // CHECK-LABEL: @test_vreinterpret_f32_s64(
15825 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15826 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)15827 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
15828 return vreinterpret_f32_s64(a);
15829 }
15830
15831 // CHECK-LABEL: @test_vreinterpret_f32_u8(
15832 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15833 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)15834 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
15835 return vreinterpret_f32_u8(a);
15836 }
15837
15838 // CHECK-LABEL: @test_vreinterpret_f32_u16(
15839 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15840 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)15841 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
15842 return vreinterpret_f32_u16(a);
15843 }
15844
15845 // CHECK-LABEL: @test_vreinterpret_f32_u32(
15846 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15847 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)15848 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
15849 return vreinterpret_f32_u32(a);
15850 }
15851
15852 // CHECK-LABEL: @test_vreinterpret_f32_u64(
15853 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15854 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)15855 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
15856 return vreinterpret_f32_u64(a);
15857 }
15858
15859 // CHECK-LABEL: @test_vreinterpret_f32_f16(
15860 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
15861 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)15862 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
15863 return vreinterpret_f32_f16(a);
15864 }
15865
15866 // CHECK-LABEL: @test_vreinterpret_f32_f64(
15867 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
15868 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f64(float64x1_t a)15869 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
15870 return vreinterpret_f32_f64(a);
15871 }
15872
15873 // CHECK-LABEL: @test_vreinterpret_f32_p8(
15874 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15875 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)15876 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
15877 return vreinterpret_f32_p8(a);
15878 }
15879
15880 // CHECK-LABEL: @test_vreinterpret_f32_p16(
15881 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15882 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)15883 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
15884 return vreinterpret_f32_p16(a);
15885 }
15886
15887 // CHECK-LABEL: @test_vreinterpret_f32_p64(
15888 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15889 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p64(poly64x1_t a)15890 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
15891 return vreinterpret_f32_p64(a);
15892 }
15893
15894 // CHECK-LABEL: @test_vreinterpret_f64_s8(
15895 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15896 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s8(int8x8_t a)15897 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
15898 return vreinterpret_f64_s8(a);
15899 }
15900
15901 // CHECK-LABEL: @test_vreinterpret_f64_s16(
15902 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15903 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s16(int16x4_t a)15904 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
15905 return vreinterpret_f64_s16(a);
15906 }
15907
15908 // CHECK-LABEL: @test_vreinterpret_f64_s32(
15909 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15910 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s32(int32x2_t a)15911 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
15912 return vreinterpret_f64_s32(a);
15913 }
15914
15915 // CHECK-LABEL: @test_vreinterpret_f64_s64(
15916 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15917 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_s64(int64x1_t a)15918 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
15919 return vreinterpret_f64_s64(a);
15920 }
15921
15922 // CHECK-LABEL: @test_vreinterpret_f64_u8(
15923 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15924 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u8(uint8x8_t a)15925 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
15926 return vreinterpret_f64_u8(a);
15927 }
15928
15929 // CHECK-LABEL: @test_vreinterpret_f64_u16(
15930 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15931 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u16(uint16x4_t a)15932 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
15933 return vreinterpret_f64_u16(a);
15934 }
15935
15936 // CHECK-LABEL: @test_vreinterpret_f64_u32(
15937 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15938 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u32(uint32x2_t a)15939 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
15940 return vreinterpret_f64_u32(a);
15941 }
15942
15943 // CHECK-LABEL: @test_vreinterpret_f64_u64(
15944 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15945 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_u64(uint64x1_t a)15946 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
15947 return vreinterpret_f64_u64(a);
15948 }
15949
15950 // CHECK-LABEL: @test_vreinterpret_f64_f16(
15951 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
15952 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f16(float16x4_t a)15953 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
15954 return vreinterpret_f64_f16(a);
15955 }
15956
15957 // CHECK-LABEL: @test_vreinterpret_f64_f32(
15958 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
15959 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_f32(float32x2_t a)15960 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
15961 return vreinterpret_f64_f32(a);
15962 }
15963
15964 // CHECK-LABEL: @test_vreinterpret_f64_p8(
15965 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15966 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p8(poly8x8_t a)15967 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
15968 return vreinterpret_f64_p8(a);
15969 }
15970
15971 // CHECK-LABEL: @test_vreinterpret_f64_p16(
15972 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15973 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p16(poly16x4_t a)15974 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
15975 return vreinterpret_f64_p16(a);
15976 }
15977
15978 // CHECK-LABEL: @test_vreinterpret_f64_p64(
15979 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15980 // CHECK: ret <1 x double> [[TMP0]]
test_vreinterpret_f64_p64(poly64x1_t a)15981 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
15982 return vreinterpret_f64_p64(a);
15983 }
15984
15985 // CHECK-LABEL: @test_vreinterpret_p8_s8(
15986 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)15987 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
15988 return vreinterpret_p8_s8(a);
15989 }
15990
15991 // CHECK-LABEL: @test_vreinterpret_p8_s16(
15992 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15993 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)15994 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
15995 return vreinterpret_p8_s16(a);
15996 }
15997
15998 // CHECK-LABEL: @test_vreinterpret_p8_s32(
15999 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16000 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)16001 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
16002 return vreinterpret_p8_s32(a);
16003 }
16004
16005 // CHECK-LABEL: @test_vreinterpret_p8_s64(
16006 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16007 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)16008 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
16009 return vreinterpret_p8_s64(a);
16010 }
16011
16012 // CHECK-LABEL: @test_vreinterpret_p8_u8(
16013 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)16014 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
16015 return vreinterpret_p8_u8(a);
16016 }
16017
16018 // CHECK-LABEL: @test_vreinterpret_p8_u16(
16019 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16020 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)16021 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
16022 return vreinterpret_p8_u16(a);
16023 }
16024
16025 // CHECK-LABEL: @test_vreinterpret_p8_u32(
16026 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16027 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)16028 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
16029 return vreinterpret_p8_u32(a);
16030 }
16031
16032 // CHECK-LABEL: @test_vreinterpret_p8_u64(
16033 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16034 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)16035 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
16036 return vreinterpret_p8_u64(a);
16037 }
16038
16039 // CHECK-LABEL: @test_vreinterpret_p8_f16(
16040 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
16041 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)16042 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
16043 return vreinterpret_p8_f16(a);
16044 }
16045
16046 // CHECK-LABEL: @test_vreinterpret_p8_f32(
16047 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
16048 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)16049 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
16050 return vreinterpret_p8_f32(a);
16051 }
16052
16053 // CHECK-LABEL: @test_vreinterpret_p8_f64(
16054 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
16055 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f64(float64x1_t a)16056 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
16057 return vreinterpret_p8_f64(a);
16058 }
16059
16060 // CHECK-LABEL: @test_vreinterpret_p8_p16(
16061 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16062 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)16063 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
16064 return vreinterpret_p8_p16(a);
16065 }
16066
16067 // CHECK-LABEL: @test_vreinterpret_p8_p64(
16068 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16069 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p64(poly64x1_t a)16070 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
16071 return vreinterpret_p8_p64(a);
16072 }
16073
16074 // CHECK-LABEL: @test_vreinterpret_p16_s8(
16075 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16076 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)16077 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
16078 return vreinterpret_p16_s8(a);
16079 }
16080
16081 // CHECK-LABEL: @test_vreinterpret_p16_s16(
16082 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)16083 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
16084 return vreinterpret_p16_s16(a);
16085 }
16086
16087 // CHECK-LABEL: @test_vreinterpret_p16_s32(
16088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16089 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)16090 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
16091 return vreinterpret_p16_s32(a);
16092 }
16093
16094 // CHECK-LABEL: @test_vreinterpret_p16_s64(
16095 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16096 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)16097 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
16098 return vreinterpret_p16_s64(a);
16099 }
16100
16101 // CHECK-LABEL: @test_vreinterpret_p16_u8(
16102 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16103 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)16104 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
16105 return vreinterpret_p16_u8(a);
16106 }
16107
16108 // CHECK-LABEL: @test_vreinterpret_p16_u16(
16109 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)16110 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
16111 return vreinterpret_p16_u16(a);
16112 }
16113
16114 // CHECK-LABEL: @test_vreinterpret_p16_u32(
16115 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
16116 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)16117 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
16118 return vreinterpret_p16_u32(a);
16119 }
16120
16121 // CHECK-LABEL: @test_vreinterpret_p16_u64(
16122 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16123 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)16124 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
16125 return vreinterpret_p16_u64(a);
16126 }
16127
16128 // CHECK-LABEL: @test_vreinterpret_p16_f16(
16129 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
16130 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)16131 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
16132 return vreinterpret_p16_f16(a);
16133 }
16134
16135 // CHECK-LABEL: @test_vreinterpret_p16_f32(
16136 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
16137 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)16138 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
16139 return vreinterpret_p16_f32(a);
16140 }
16141
16142 // CHECK-LABEL: @test_vreinterpret_p16_f64(
16143 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
16144 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f64(float64x1_t a)16145 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
16146 return vreinterpret_p16_f64(a);
16147 }
16148
16149 // CHECK-LABEL: @test_vreinterpret_p16_p8(
16150 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
16151 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)16152 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
16153 return vreinterpret_p16_p8(a);
16154 }
16155
16156 // CHECK-LABEL: @test_vreinterpret_p16_p64(
16157 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
16158 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p64(poly64x1_t a)16159 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
16160 return vreinterpret_p16_p64(a);
16161 }
16162
16163 // CHECK-LABEL: @test_vreinterpret_p64_s8(
16164 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16165 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s8(int8x8_t a)16166 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
16167 return vreinterpret_p64_s8(a);
16168 }
16169
16170 // CHECK-LABEL: @test_vreinterpret_p64_s16(
16171 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16172 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s16(int16x4_t a)16173 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
16174 return vreinterpret_p64_s16(a);
16175 }
16176
16177 // CHECK-LABEL: @test_vreinterpret_p64_s32(
16178 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16179 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_s32(int32x2_t a)16180 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
16181 return vreinterpret_p64_s32(a);
16182 }
16183
16184 // CHECK-LABEL: @test_vreinterpret_p64_s64(
16185 // CHECK: ret <1 x i64> %a
test_vreinterpret_p64_s64(int64x1_t a)16186 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
16187 return vreinterpret_p64_s64(a);
16188 }
16189
16190 // CHECK-LABEL: @test_vreinterpret_p64_u8(
16191 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16192 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u8(uint8x8_t a)16193 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
16194 return vreinterpret_p64_u8(a);
16195 }
16196
16197 // CHECK-LABEL: @test_vreinterpret_p64_u16(
16198 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16199 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u16(uint16x4_t a)16200 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
16201 return vreinterpret_p64_u16(a);
16202 }
16203
16204 // CHECK-LABEL: @test_vreinterpret_p64_u32(
16205 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
16206 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_u32(uint32x2_t a)16207 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
16208 return vreinterpret_p64_u32(a);
16209 }
16210
16211 // CHECK-LABEL: @test_vreinterpret_p64_u64(
16212 // CHECK: ret <1 x i64> %a
test_vreinterpret_p64_u64(uint64x1_t a)16213 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
16214 return vreinterpret_p64_u64(a);
16215 }
16216
16217 // CHECK-LABEL: @test_vreinterpret_p64_f16(
16218 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
16219 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f16(float16x4_t a)16220 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
16221 return vreinterpret_p64_f16(a);
16222 }
16223
16224 // CHECK-LABEL: @test_vreinterpret_p64_f32(
16225 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
16226 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f32(float32x2_t a)16227 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
16228 return vreinterpret_p64_f32(a);
16229 }
16230
16231 // CHECK-LABEL: @test_vreinterpret_p64_f64(
16232 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
16233 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_f64(float64x1_t a)16234 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
16235 return vreinterpret_p64_f64(a);
16236 }
16237
16238 // CHECK-LABEL: @test_vreinterpret_p64_p8(
16239 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
16240 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p8(poly8x8_t a)16241 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
16242 return vreinterpret_p64_p8(a);
16243 }
16244
16245 // CHECK-LABEL: @test_vreinterpret_p64_p16(
16246 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
16247 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_p64_p16(poly16x4_t a)16248 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
16249 return vreinterpret_p64_p16(a);
16250 }
16251
16252 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
16253 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16254 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)16255 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
16256 return vreinterpretq_s8_s16(a);
16257 }
16258
16259 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
16260 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16261 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)16262 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
16263 return vreinterpretq_s8_s32(a);
16264 }
16265
16266 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
16267 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16268 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)16269 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
16270 return vreinterpretq_s8_s64(a);
16271 }
16272
16273 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
16274 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)16275 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
16276 return vreinterpretq_s8_u8(a);
16277 }
16278
16279 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
16280 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16281 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)16282 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
16283 return vreinterpretq_s8_u16(a);
16284 }
16285
16286 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
16287 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16288 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)16289 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
16290 return vreinterpretq_s8_u32(a);
16291 }
16292
16293 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
16294 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16295 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)16296 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
16297 return vreinterpretq_s8_u64(a);
16298 }
16299
16300 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
16301 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16302 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)16303 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
16304 return vreinterpretq_s8_f16(a);
16305 }
16306
16307 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
16308 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16309 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)16310 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
16311 return vreinterpretq_s8_f32(a);
16312 }
16313
16314 // CHECK-LABEL: @test_vreinterpretq_s8_f64(
16315 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16316 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f64(float64x2_t a)16317 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
16318 return vreinterpretq_s8_f64(a);
16319 }
16320
16321 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
16322 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)16323 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
16324 return vreinterpretq_s8_p8(a);
16325 }
16326
16327 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
16328 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16329 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)16330 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
16331 return vreinterpretq_s8_p16(a);
16332 }
16333
16334 // CHECK-LABEL: @test_vreinterpretq_s8_p64(
16335 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16336 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p64(poly64x2_t a)16337 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
16338 return vreinterpretq_s8_p64(a);
16339 }
16340
16341 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
16342 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16343 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)16344 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
16345 return vreinterpretq_s16_s8(a);
16346 }
16347
16348 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
16349 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16350 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)16351 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
16352 return vreinterpretq_s16_s32(a);
16353 }
16354
16355 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
16356 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16357 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)16358 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
16359 return vreinterpretq_s16_s64(a);
16360 }
16361
16362 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
16363 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16364 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)16365 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
16366 return vreinterpretq_s16_u8(a);
16367 }
16368
16369 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
16370 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)16371 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
16372 return vreinterpretq_s16_u16(a);
16373 }
16374
16375 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
16376 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16377 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)16378 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
16379 return vreinterpretq_s16_u32(a);
16380 }
16381
16382 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
16383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16384 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)16385 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
16386 return vreinterpretq_s16_u64(a);
16387 }
16388
16389 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
16390 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16391 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)16392 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
16393 return vreinterpretq_s16_f16(a);
16394 }
16395
16396 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
16397 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16398 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)16399 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
16400 return vreinterpretq_s16_f32(a);
16401 }
16402
16403 // CHECK-LABEL: @test_vreinterpretq_s16_f64(
16404 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16405 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f64(float64x2_t a)16406 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
16407 return vreinterpretq_s16_f64(a);
16408 }
16409
16410 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
16411 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16412 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)16413 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
16414 return vreinterpretq_s16_p8(a);
16415 }
16416
16417 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
16418 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)16419 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
16420 return vreinterpretq_s16_p16(a);
16421 }
16422
16423 // CHECK-LABEL: @test_vreinterpretq_s16_p64(
16424 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16425 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p64(poly64x2_t a)16426 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
16427 return vreinterpretq_s16_p64(a);
16428 }
16429
16430 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
16431 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16432 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)16433 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
16434 return vreinterpretq_s32_s8(a);
16435 }
16436
16437 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
16438 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16439 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)16440 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
16441 return vreinterpretq_s32_s16(a);
16442 }
16443
16444 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
16445 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16446 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)16447 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
16448 return vreinterpretq_s32_s64(a);
16449 }
16450
16451 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
16452 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16453 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)16454 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
16455 return vreinterpretq_s32_u8(a);
16456 }
16457
16458 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
16459 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16460 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)16461 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
16462 return vreinterpretq_s32_u16(a);
16463 }
16464
16465 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
16466 // CHECK: ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)16467 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
16468 return vreinterpretq_s32_u32(a);
16469 }
16470
16471 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
16472 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16473 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)16474 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
16475 return vreinterpretq_s32_u64(a);
16476 }
16477
16478 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
16479 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16480 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)16481 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
16482 return vreinterpretq_s32_f16(a);
16483 }
16484
16485 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
16486 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16487 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)16488 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
16489 return vreinterpretq_s32_f32(a);
16490 }
16491
16492 // CHECK-LABEL: @test_vreinterpretq_s32_f64(
16493 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16494 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f64(float64x2_t a)16495 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
16496 return vreinterpretq_s32_f64(a);
16497 }
16498
16499 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
16500 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16501 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)16502 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
16503 return vreinterpretq_s32_p8(a);
16504 }
16505
16506 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
16507 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16508 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)16509 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
16510 return vreinterpretq_s32_p16(a);
16511 }
16512
16513 // CHECK-LABEL: @test_vreinterpretq_s32_p64(
16514 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16515 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p64(poly64x2_t a)16516 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
16517 return vreinterpretq_s32_p64(a);
16518 }
16519
16520 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
16521 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16522 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)16523 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
16524 return vreinterpretq_s64_s8(a);
16525 }
16526
16527 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
16528 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16529 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)16530 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
16531 return vreinterpretq_s64_s16(a);
16532 }
16533
16534 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
16535 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16536 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)16537 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
16538 return vreinterpretq_s64_s32(a);
16539 }
16540
16541 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
16542 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16543 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)16544 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
16545 return vreinterpretq_s64_u8(a);
16546 }
16547
16548 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
16549 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16550 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)16551 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
16552 return vreinterpretq_s64_u16(a);
16553 }
16554
16555 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
16556 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16557 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)16558 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
16559 return vreinterpretq_s64_u32(a);
16560 }
16561
16562 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
16563 // CHECK: ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)16564 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
16565 return vreinterpretq_s64_u64(a);
16566 }
16567
16568 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
16569 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16570 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)16571 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
16572 return vreinterpretq_s64_f16(a);
16573 }
16574
16575 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
16576 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16577 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)16578 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
16579 return vreinterpretq_s64_f32(a);
16580 }
16581
16582 // CHECK-LABEL: @test_vreinterpretq_s64_f64(
16583 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16584 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f64(float64x2_t a)16585 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
16586 return vreinterpretq_s64_f64(a);
16587 }
16588
16589 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
16590 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16591 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)16592 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
16593 return vreinterpretq_s64_p8(a);
16594 }
16595
16596 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
16597 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16598 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)16599 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
16600 return vreinterpretq_s64_p16(a);
16601 }
16602
16603 // CHECK-LABEL: @test_vreinterpretq_s64_p64(
16604 // CHECK: ret <2 x i64> %a
test_vreinterpretq_s64_p64(poly64x2_t a)16605 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
16606 return vreinterpretq_s64_p64(a);
16607 }
16608
16609 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
16610 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)16611 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
16612 return vreinterpretq_u8_s8(a);
16613 }
16614
16615 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
16616 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16617 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)16618 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
16619 return vreinterpretq_u8_s16(a);
16620 }
16621
16622 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
16623 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16624 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)16625 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
16626 return vreinterpretq_u8_s32(a);
16627 }
16628
16629 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
16630 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16631 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)16632 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
16633 return vreinterpretq_u8_s64(a);
16634 }
16635
16636 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
16637 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16638 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)16639 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
16640 return vreinterpretq_u8_u16(a);
16641 }
16642
16643 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
16644 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16645 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)16646 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
16647 return vreinterpretq_u8_u32(a);
16648 }
16649
16650 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
16651 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16652 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)16653 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
16654 return vreinterpretq_u8_u64(a);
16655 }
16656
16657 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
16658 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16659 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)16660 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
16661 return vreinterpretq_u8_f16(a);
16662 }
16663
16664 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
16665 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16666 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)16667 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
16668 return vreinterpretq_u8_f32(a);
16669 }
16670
16671 // CHECK-LABEL: @test_vreinterpretq_u8_f64(
16672 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16673 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f64(float64x2_t a)16674 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
16675 return vreinterpretq_u8_f64(a);
16676 }
16677
16678 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
16679 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)16680 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
16681 return vreinterpretq_u8_p8(a);
16682 }
16683
16684 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
16685 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16686 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)16687 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
16688 return vreinterpretq_u8_p16(a);
16689 }
16690
16691 // CHECK-LABEL: @test_vreinterpretq_u8_p64(
16692 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16693 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p64(poly64x2_t a)16694 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
16695 return vreinterpretq_u8_p64(a);
16696 }
16697
16698 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
16699 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16700 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)16701 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
16702 return vreinterpretq_u16_s8(a);
16703 }
16704
16705 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
16706 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)16707 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
16708 return vreinterpretq_u16_s16(a);
16709 }
16710
16711 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
16712 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16713 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)16714 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
16715 return vreinterpretq_u16_s32(a);
16716 }
16717
16718 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
16719 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16720 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)16721 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
16722 return vreinterpretq_u16_s64(a);
16723 }
16724
16725 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
16726 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16727 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)16728 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
16729 return vreinterpretq_u16_u8(a);
16730 }
16731
16732 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
16733 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16734 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)16735 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
16736 return vreinterpretq_u16_u32(a);
16737 }
16738
16739 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
16740 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16741 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)16742 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
16743 return vreinterpretq_u16_u64(a);
16744 }
16745
16746 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
16747 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16748 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)16749 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
16750 return vreinterpretq_u16_f16(a);
16751 }
16752
16753 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
16754 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16755 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)16756 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
16757 return vreinterpretq_u16_f32(a);
16758 }
16759
16760 // CHECK-LABEL: @test_vreinterpretq_u16_f64(
16761 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16762 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f64(float64x2_t a)16763 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
16764 return vreinterpretq_u16_f64(a);
16765 }
16766
16767 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
16768 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16769 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)16770 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
16771 return vreinterpretq_u16_p8(a);
16772 }
16773
16774 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
16775 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)16776 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
16777 return vreinterpretq_u16_p16(a);
16778 }
16779
16780 // CHECK-LABEL: @test_vreinterpretq_u16_p64(
16781 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16782 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p64(poly64x2_t a)16783 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
16784 return vreinterpretq_u16_p64(a);
16785 }
16786
16787 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
16788 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16789 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)16790 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
16791 return vreinterpretq_u32_s8(a);
16792 }
16793
16794 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
16795 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16796 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)16797 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
16798 return vreinterpretq_u32_s16(a);
16799 }
16800
16801 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
16802 // CHECK: ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)16803 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
16804 return vreinterpretq_u32_s32(a);
16805 }
16806
16807 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
16808 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16809 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)16810 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
16811 return vreinterpretq_u32_s64(a);
16812 }
16813
16814 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
16815 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16816 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)16817 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
16818 return vreinterpretq_u32_u8(a);
16819 }
16820
16821 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
16822 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16823 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)16824 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
16825 return vreinterpretq_u32_u16(a);
16826 }
16827
16828 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
16829 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16830 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)16831 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
16832 return vreinterpretq_u32_u64(a);
16833 }
16834
16835 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
16836 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16837 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)16838 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
16839 return vreinterpretq_u32_f16(a);
16840 }
16841
16842 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
16843 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16844 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)16845 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
16846 return vreinterpretq_u32_f32(a);
16847 }
16848
16849 // CHECK-LABEL: @test_vreinterpretq_u32_f64(
16850 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16851 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f64(float64x2_t a)16852 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
16853 return vreinterpretq_u32_f64(a);
16854 }
16855
16856 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
16857 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16858 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)16859 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
16860 return vreinterpretq_u32_p8(a);
16861 }
16862
16863 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
16864 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16865 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)16866 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
16867 return vreinterpretq_u32_p16(a);
16868 }
16869
16870 // CHECK-LABEL: @test_vreinterpretq_u32_p64(
16871 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16872 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p64(poly64x2_t a)16873 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
16874 return vreinterpretq_u32_p64(a);
16875 }
16876
16877 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
16878 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16879 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)16880 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
16881 return vreinterpretq_u64_s8(a);
16882 }
16883
16884 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
16885 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16886 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)16887 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
16888 return vreinterpretq_u64_s16(a);
16889 }
16890
16891 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
16892 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16893 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)16894 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
16895 return vreinterpretq_u64_s32(a);
16896 }
16897
16898 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
16899 // CHECK: ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)16900 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
16901 return vreinterpretq_u64_s64(a);
16902 }
16903
16904 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
16905 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16906 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)16907 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
16908 return vreinterpretq_u64_u8(a);
16909 }
16910
16911 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
16912 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16913 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)16914 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
16915 return vreinterpretq_u64_u16(a);
16916 }
16917
16918 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
16919 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16920 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)16921 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
16922 return vreinterpretq_u64_u32(a);
16923 }
16924
16925 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
16926 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16927 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)16928 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
16929 return vreinterpretq_u64_f16(a);
16930 }
16931
16932 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
16933 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16934 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)16935 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
16936 return vreinterpretq_u64_f32(a);
16937 }
16938
16939 // CHECK-LABEL: @test_vreinterpretq_u64_f64(
16940 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16941 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f64(float64x2_t a)16942 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
16943 return vreinterpretq_u64_f64(a);
16944 }
16945
16946 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
16947 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16948 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)16949 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
16950 return vreinterpretq_u64_p8(a);
16951 }
16952
16953 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
16954 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16955 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)16956 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
16957 return vreinterpretq_u64_p16(a);
16958 }
16959
16960 // CHECK-LABEL: @test_vreinterpretq_u64_p64(
16961 // CHECK: ret <2 x i64> %a
test_vreinterpretq_u64_p64(poly64x2_t a)16962 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
16963 return vreinterpretq_u64_p64(a);
16964 }
16965
16966 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
16967 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16968 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)16969 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
16970 return vreinterpretq_f16_s8(a);
16971 }
16972
16973 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
16974 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16975 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)16976 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
16977 return vreinterpretq_f16_s16(a);
16978 }
16979
16980 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
16981 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
16982 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)16983 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
16984 return vreinterpretq_f16_s32(a);
16985 }
16986
16987 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
16988 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
16989 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)16990 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
16991 return vreinterpretq_f16_s64(a);
16992 }
16993
16994 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
16995 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16996 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)16997 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
16998 return vreinterpretq_f16_u8(a);
16999 }
17000
17001 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
17002 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17003 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)17004 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
17005 return vreinterpretq_f16_u16(a);
17006 }
17007
17008 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
17009 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
17010 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)17011 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
17012 return vreinterpretq_f16_u32(a);
17013 }
17014
17015 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
17016 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17017 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)17018 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
17019 return vreinterpretq_f16_u64(a);
17020 }
17021
17022 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
17023 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
17024 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)17025 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
17026 return vreinterpretq_f16_f32(a);
17027 }
17028
17029 // CHECK-LABEL: @test_vreinterpretq_f16_f64(
17030 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
17031 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f64(float64x2_t a)17032 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
17033 return vreinterpretq_f16_f64(a);
17034 }
17035
17036 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
17037 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
17038 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)17039 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
17040 return vreinterpretq_f16_p8(a);
17041 }
17042
17043 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
17044 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
17045 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)17046 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
17047 return vreinterpretq_f16_p16(a);
17048 }
17049
17050 // CHECK-LABEL: @test_vreinterpretq_f16_p64(
17051 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
17052 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p64(poly64x2_t a)17053 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
17054 return vreinterpretq_f16_p64(a);
17055 }
17056
17057 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
17058 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17059 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)17060 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
17061 return vreinterpretq_f32_s8(a);
17062 }
17063
17064 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
17065 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17066 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)17067 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
17068 return vreinterpretq_f32_s16(a);
17069 }
17070
17071 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
17072 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17073 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)17074 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
17075 return vreinterpretq_f32_s32(a);
17076 }
17077
17078 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
17079 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17080 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)17081 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
17082 return vreinterpretq_f32_s64(a);
17083 }
17084
17085 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
17086 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17087 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)17088 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
17089 return vreinterpretq_f32_u8(a);
17090 }
17091
17092 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
17093 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17094 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)17095 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
17096 return vreinterpretq_f32_u16(a);
17097 }
17098
17099 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
17100 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
17101 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)17102 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
17103 return vreinterpretq_f32_u32(a);
17104 }
17105
17106 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
17107 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17108 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)17109 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
17110 return vreinterpretq_f32_u64(a);
17111 }
17112
17113 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
17114 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
17115 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)17116 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
17117 return vreinterpretq_f32_f16(a);
17118 }
17119
17120 // CHECK-LABEL: @test_vreinterpretq_f32_f64(
17121 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
17122 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f64(float64x2_t a)17123 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
17124 return vreinterpretq_f32_f64(a);
17125 }
17126
17127 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
17128 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
17129 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)17130 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
17131 return vreinterpretq_f32_p8(a);
17132 }
17133
17134 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
17135 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
17136 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)17137 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
17138 return vreinterpretq_f32_p16(a);
17139 }
17140
17141 // CHECK-LABEL: @test_vreinterpretq_f32_p64(
17142 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
17143 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p64(poly64x2_t a)17144 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
17145 return vreinterpretq_f32_p64(a);
17146 }
17147
17148 // CHECK-LABEL: @test_vreinterpretq_f64_s8(
17149 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17150 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s8(int8x16_t a)17151 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
17152 return vreinterpretq_f64_s8(a);
17153 }
17154
17155 // CHECK-LABEL: @test_vreinterpretq_f64_s16(
17156 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17157 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s16(int16x8_t a)17158 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
17159 return vreinterpretq_f64_s16(a);
17160 }
17161
17162 // CHECK-LABEL: @test_vreinterpretq_f64_s32(
17163 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17164 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s32(int32x4_t a)17165 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
17166 return vreinterpretq_f64_s32(a);
17167 }
17168
17169 // CHECK-LABEL: @test_vreinterpretq_f64_s64(
17170 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17171 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_s64(int64x2_t a)17172 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
17173 return vreinterpretq_f64_s64(a);
17174 }
17175
17176 // CHECK-LABEL: @test_vreinterpretq_f64_u8(
17177 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17178 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u8(uint8x16_t a)17179 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
17180 return vreinterpretq_f64_u8(a);
17181 }
17182
17183 // CHECK-LABEL: @test_vreinterpretq_f64_u16(
17184 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17185 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u16(uint16x8_t a)17186 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
17187 return vreinterpretq_f64_u16(a);
17188 }
17189
17190 // CHECK-LABEL: @test_vreinterpretq_f64_u32(
17191 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
17192 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u32(uint32x4_t a)17193 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
17194 return vreinterpretq_f64_u32(a);
17195 }
17196
17197 // CHECK-LABEL: @test_vreinterpretq_f64_u64(
17198 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17199 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_u64(uint64x2_t a)17200 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
17201 return vreinterpretq_f64_u64(a);
17202 }
17203
17204 // CHECK-LABEL: @test_vreinterpretq_f64_f16(
17205 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
17206 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f16(float16x8_t a)17207 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
17208 return vreinterpretq_f64_f16(a);
17209 }
17210
17211 // CHECK-LABEL: @test_vreinterpretq_f64_f32(
17212 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
17213 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_f32(float32x4_t a)17214 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
17215 return vreinterpretq_f64_f32(a);
17216 }
17217
17218 // CHECK-LABEL: @test_vreinterpretq_f64_p8(
17219 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
17220 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p8(poly8x16_t a)17221 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
17222 return vreinterpretq_f64_p8(a);
17223 }
17224
17225 // CHECK-LABEL: @test_vreinterpretq_f64_p16(
17226 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
17227 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p16(poly16x8_t a)17228 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
17229 return vreinterpretq_f64_p16(a);
17230 }
17231
17232 // CHECK-LABEL: @test_vreinterpretq_f64_p64(
17233 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
17234 // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p64(poly64x2_t a)17235 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
17236 return vreinterpretq_f64_p64(a);
17237 }
17238
17239 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
17240 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)17241 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
17242 return vreinterpretq_p8_s8(a);
17243 }
17244
17245 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
17246 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17247 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)17248 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
17249 return vreinterpretq_p8_s16(a);
17250 }
17251
17252 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
17253 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17254 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)17255 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
17256 return vreinterpretq_p8_s32(a);
17257 }
17258
17259 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
17260 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17261 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)17262 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
17263 return vreinterpretq_p8_s64(a);
17264 }
17265
17266 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
17267 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)17268 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
17269 return vreinterpretq_p8_u8(a);
17270 }
17271
17272 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
17273 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17274 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)17275 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
17276 return vreinterpretq_p8_u16(a);
17277 }
17278
17279 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
17280 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17281 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)17282 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
17283 return vreinterpretq_p8_u32(a);
17284 }
17285
17286 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
17287 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17288 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)17289 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
17290 return vreinterpretq_p8_u64(a);
17291 }
17292
17293 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
17294 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
17295 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)17296 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
17297 return vreinterpretq_p8_f16(a);
17298 }
17299
17300 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
17301 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
17302 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)17303 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
17304 return vreinterpretq_p8_f32(a);
17305 }
17306
17307 // CHECK-LABEL: @test_vreinterpretq_p8_f64(
17308 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
17309 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f64(float64x2_t a)17310 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
17311 return vreinterpretq_p8_f64(a);
17312 }
17313
17314 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
17315 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17316 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)17317 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
17318 return vreinterpretq_p8_p16(a);
17319 }
17320
17321 // CHECK-LABEL: @test_vreinterpretq_p8_p64(
17322 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17323 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p64(poly64x2_t a)17324 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
17325 return vreinterpretq_p8_p64(a);
17326 }
17327
17328 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
17329 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17330 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)17331 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
17332 return vreinterpretq_p16_s8(a);
17333 }
17334
17335 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
17336 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)17337 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
17338 return vreinterpretq_p16_s16(a);
17339 }
17340
17341 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
17342 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17343 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)17344 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
17345 return vreinterpretq_p16_s32(a);
17346 }
17347
17348 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
17349 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17350 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)17351 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
17352 return vreinterpretq_p16_s64(a);
17353 }
17354
17355 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
17356 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17357 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)17358 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
17359 return vreinterpretq_p16_u8(a);
17360 }
17361
17362 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
17363 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)17364 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
17365 return vreinterpretq_p16_u16(a);
17366 }
17367
17368 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
17369 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
17370 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)17371 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
17372 return vreinterpretq_p16_u32(a);
17373 }
17374
17375 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
17376 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17377 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)17378 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
17379 return vreinterpretq_p16_u64(a);
17380 }
17381
17382 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
17383 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
17384 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)17385 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
17386 return vreinterpretq_p16_f16(a);
17387 }
17388
17389 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
17390 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
17391 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)17392 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
17393 return vreinterpretq_p16_f32(a);
17394 }
17395
17396 // CHECK-LABEL: @test_vreinterpretq_p16_f64(
17397 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
17398 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f64(float64x2_t a)17399 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
17400 return vreinterpretq_p16_f64(a);
17401 }
17402
17403 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
17404 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
17405 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)17406 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
17407 return vreinterpretq_p16_p8(a);
17408 }
17409
17410 // CHECK-LABEL: @test_vreinterpretq_p16_p64(
17411 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
17412 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p64(poly64x2_t a)17413 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
17414 return vreinterpretq_p16_p64(a);
17415 }
17416
17417 // CHECK-LABEL: @test_vreinterpretq_p64_s8(
17418 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17419 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s8(int8x16_t a)17420 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
17421 return vreinterpretq_p64_s8(a);
17422 }
17423
17424 // CHECK-LABEL: @test_vreinterpretq_p64_s16(
17425 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17426 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s16(int16x8_t a)17427 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
17428 return vreinterpretq_p64_s16(a);
17429 }
17430
17431 // CHECK-LABEL: @test_vreinterpretq_p64_s32(
17432 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17433 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_s32(int32x4_t a)17434 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
17435 return vreinterpretq_p64_s32(a);
17436 }
17437
17438 // CHECK-LABEL: @test_vreinterpretq_p64_s64(
17439 // CHECK: ret <2 x i64> %a
test_vreinterpretq_p64_s64(int64x2_t a)17440 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
17441 return vreinterpretq_p64_s64(a);
17442 }
17443
17444 // CHECK-LABEL: @test_vreinterpretq_p64_u8(
17445 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17446 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u8(uint8x16_t a)17447 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
17448 return vreinterpretq_p64_u8(a);
17449 }
17450
17451 // CHECK-LABEL: @test_vreinterpretq_p64_u16(
17452 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17453 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u16(uint16x8_t a)17454 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
17455 return vreinterpretq_p64_u16(a);
17456 }
17457
17458 // CHECK-LABEL: @test_vreinterpretq_p64_u32(
17459 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
17460 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_u32(uint32x4_t a)17461 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
17462 return vreinterpretq_p64_u32(a);
17463 }
17464
17465 // CHECK-LABEL: @test_vreinterpretq_p64_u64(
17466 // CHECK: ret <2 x i64> %a
test_vreinterpretq_p64_u64(uint64x2_t a)17467 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
17468 return vreinterpretq_p64_u64(a);
17469 }
17470
17471 // CHECK-LABEL: @test_vreinterpretq_p64_f16(
17472 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
17473 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f16(float16x8_t a)17474 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
17475 return vreinterpretq_p64_f16(a);
17476 }
17477
17478 // CHECK-LABEL: @test_vreinterpretq_p64_f32(
17479 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
17480 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f32(float32x4_t a)17481 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
17482 return vreinterpretq_p64_f32(a);
17483 }
17484
17485 // CHECK-LABEL: @test_vreinterpretq_p64_f64(
17486 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
17487 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_f64(float64x2_t a)17488 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
17489 return vreinterpretq_p64_f64(a);
17490 }
17491
17492 // CHECK-LABEL: @test_vreinterpretq_p64_p8(
17493 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
17494 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p8(poly8x16_t a)17495 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
17496 return vreinterpretq_p64_p8(a);
17497 }
17498
17499 // CHECK-LABEL: @test_vreinterpretq_p64_p16(
17500 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
17501 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p16(poly16x8_t a)17502 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
17503 return vreinterpretq_p64_p16(a);
17504 }
17505
17506 // CHECK-LABEL: @test_vabds_f32(
17507 // CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b)
17508 // CHECK: ret float [[VABDS_F32_I]]
test_vabds_f32(float32_t a,float32_t b)17509 float32_t test_vabds_f32(float32_t a, float32_t b) {
17510 return vabds_f32(a, b);
17511 }
17512
17513 // CHECK-LABEL: @test_vabdd_f64(
17514 // CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b)
17515 // CHECK: ret double [[VABDD_F64_I]]
test_vabdd_f64(float64_t a,float64_t b)17516 float64_t test_vabdd_f64(float64_t a, float64_t b) {
17517 return vabdd_f64(a, b);
17518 }
17519
17520 // CHECK-LABEL: @test_vuqaddq_s8(
17521 // CHECK: entry:
17522 // CHECK-NEXT: [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17523 // CHECK-NEXT: ret <16 x i8> [[V]]
test_vuqaddq_s8(int8x16_t a,uint8x16_t b)17524 int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) {
17525 return vuqaddq_s8(a, b);
17526 }
17527
17528 // CHECK-LABEL: @test_vuqaddq_s32(
17529 // CHECK: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17530 // CHECK-NEXT: ret <4 x i32> [[V]]
test_vuqaddq_s32(int32x4_t a,uint32x4_t b)17531 int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) {
17532 return vuqaddq_s32(a, b);
17533 }
17534
17535 // CHECK-LABEL: @test_vuqaddq_s64(
17536 // CHECK: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17537 // CHECK-NEXT: ret <2 x i64> [[V]]
test_vuqaddq_s64(int64x2_t a,uint64x2_t b)17538 int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) {
17539 return vuqaddq_s64(a, b);
17540 }
17541
17542 // CHECK-LABEL: @test_vuqaddq_s16(
17543 // CHECK: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17544 // CHECK-NEXT: ret <8 x i16> [[V]]
test_vuqaddq_s16(int16x8_t a,uint16x8_t b)17545 int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) {
17546 return vuqaddq_s16(a, b);
17547 }
17548
17549 // CHECK-LABEL: @test_vuqadd_s8(
17550 // CHECK: entry:
17551 // CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17552 // CHECK-NEXT: ret <8 x i8> [[V]]
test_vuqadd_s8(int8x8_t a,uint8x8_t b)17553 int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) {
17554 return vuqadd_s8(a, b);
17555 }
17556
17557 // CHECK-LABEL: @test_vuqadd_s32(
17558 // CHECK: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17559 // CHECK-NEXT: ret <2 x i32> [[V]]
test_vuqadd_s32(int32x2_t a,uint32x2_t b)17560 int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) {
17561 return vuqadd_s32(a, b);
17562 }
17563
17564 // CHECK-LABEL: @test_vuqadd_s64(
17565 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17566 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17567 // CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17568 // CHECK: ret <1 x i64> [[VUQADD2_I]]
test_vuqadd_s64(int64x1_t a,uint64x1_t b)17569 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
17570 return vuqadd_s64(a, b);
17571 }
17572
17573 // CHECK-LABEL: @test_vuqadd_s16(
17574 // CHECK: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17575 // CHECK-NEXT: ret <4 x i16> [[V]]
test_vuqadd_s16(int16x4_t a,uint16x4_t b)17576 int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) {
17577 return vuqadd_s16(a, b);
17578 }
17579
17580 // CHECK-LABEL: @test_vsqadd_u64(
17581 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17582 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
17583 // CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
17584 // CHECK: ret <1 x i64> [[VSQADD2_I]]
test_vsqadd_u64(uint64x1_t a,int64x1_t b)17585 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
17586 return vsqadd_u64(a, b);
17587 }
17588
17589 // CHECK-LABEL: @test_vsqadd_u8(
17590 // CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
17591 // CHECK: ret <8 x i8> [[VSQADD_I]]
test_vsqadd_u8(uint8x8_t a,int8x8_t b)17592 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
17593 return vsqadd_u8(a, b);
17594 }
17595
17596 // CHECK-LABEL: @test_vsqaddq_u8(
17597 // CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
17598 // CHECK: ret <16 x i8> [[VSQADD_I]]
test_vsqaddq_u8(uint8x16_t a,int8x16_t b)17599 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
17600 return vsqaddq_u8(a, b);
17601 }
17602
17603 // CHECK-LABEL: @test_vsqadd_u16(
17604 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
17605 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
17606 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
17607 // CHECK: ret <4 x i16> [[VSQADD2_I]]
test_vsqadd_u16(uint16x4_t a,int16x4_t b)17608 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
17609 return vsqadd_u16(a, b);
17610 }
17611
17612 // CHECK-LABEL: @test_vsqaddq_u16(
17613 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
17614 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
17615 // CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
17616 // CHECK: ret <8 x i16> [[VSQADD2_I]]
test_vsqaddq_u16(uint16x8_t a,int16x8_t b)17617 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
17618 return vsqaddq_u16(a, b);
17619 }
17620
17621 // CHECK-LABEL: @test_vsqadd_u32(
17622 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
17623 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
17624 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
17625 // CHECK: ret <2 x i32> [[VSQADD2_I]]
test_vsqadd_u32(uint32x2_t a,int32x2_t b)17626 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
17627 return vsqadd_u32(a, b);
17628 }
17629
17630 // CHECK-LABEL: @test_vsqaddq_u32(
17631 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
17632 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
17633 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
17634 // CHECK: ret <4 x i32> [[VSQADD2_I]]
test_vsqaddq_u32(uint32x4_t a,int32x4_t b)17635 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
17636 return vsqaddq_u32(a, b);
17637 }
17638
17639 // CHECK-LABEL: @test_vsqaddq_u64(
17640 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
17641 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
17642 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
17643 // CHECK: ret <2 x i64> [[VSQADD2_I]]
test_vsqaddq_u64(uint64x2_t a,int64x2_t b)17644 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
17645 return vsqaddq_u64(a, b);
17646 }
17647
17648 // CHECK-LABEL: @test_vabs_s64(
17649 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17650 // CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
17651 // CHECK: ret <1 x i64> [[VABS1_I]]
test_vabs_s64(int64x1_t a)17652 int64x1_t test_vabs_s64(int64x1_t a) {
17653 return vabs_s64(a);
17654 }
17655
17656 // CHECK-LABEL: @test_vqabs_s64(
17657 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17658 // CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
17659 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
17660 // CHECK: ret <1 x i64> [[VQABS_V1_I]]
test_vqabs_s64(int64x1_t a)17661 int64x1_t test_vqabs_s64(int64x1_t a) {
17662 return vqabs_s64(a);
17663 }
17664
17665 // CHECK-LABEL: @test_vqneg_s64(
17666 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17667 // CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
17668 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
17669 // CHECK: ret <1 x i64> [[VQNEG_V1_I]]
test_vqneg_s64(int64x1_t a)17670 int64x1_t test_vqneg_s64(int64x1_t a) {
17671 return vqneg_s64(a);
17672 }
17673
17674 // CHECK-LABEL: @test_vneg_s64(
17675 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
17676 // CHECK: ret <1 x i64> [[SUB_I]]
test_vneg_s64(int64x1_t a)17677 int64x1_t test_vneg_s64(int64x1_t a) {
17678 return vneg_s64(a);
17679 }
17680
17681 // CHECK-LABEL: @test_vaddv_f32(
17682 // CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
17683 // CHECK: ret float [[VADDV_F32_I]]
test_vaddv_f32(float32x2_t a)17684 float32_t test_vaddv_f32(float32x2_t a) {
17685 return vaddv_f32(a);
17686 }
17687
17688 // CHECK-LABEL: @test_vaddvq_f32(
17689 // CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
17690 // CHECK: ret float [[VADDVQ_F32_I]]
test_vaddvq_f32(float32x4_t a)17691 float32_t test_vaddvq_f32(float32x4_t a) {
17692 return vaddvq_f32(a);
17693 }
17694
17695 // CHECK-LABEL: @test_vaddvq_f64(
17696 // CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
17697 // CHECK: ret double [[VADDVQ_F64_I]]
test_vaddvq_f64(float64x2_t a)17698 float64_t test_vaddvq_f64(float64x2_t a) {
17699 return vaddvq_f64(a);
17700 }
17701
17702 // CHECK-LABEL: @test_vmaxv_f32(
17703 // CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
17704 // CHECK: ret float [[VMAXV_F32_I]]
test_vmaxv_f32(float32x2_t a)17705 float32_t test_vmaxv_f32(float32x2_t a) {
17706 return vmaxv_f32(a);
17707 }
17708
17709 // CHECK-LABEL: @test_vmaxvq_f64(
17710 // CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
17711 // CHECK: ret double [[VMAXVQ_F64_I]]
test_vmaxvq_f64(float64x2_t a)17712 float64_t test_vmaxvq_f64(float64x2_t a) {
17713 return vmaxvq_f64(a);
17714 }
17715
17716 // CHECK-LABEL: @test_vminv_f32(
17717 // CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
17718 // CHECK: ret float [[VMINV_F32_I]]
test_vminv_f32(float32x2_t a)17719 float32_t test_vminv_f32(float32x2_t a) {
17720 return vminv_f32(a);
17721 }
17722
17723 // CHECK-LABEL: @test_vminvq_f64(
17724 // CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
17725 // CHECK: ret double [[VMINVQ_F64_I]]
test_vminvq_f64(float64x2_t a)17726 float64_t test_vminvq_f64(float64x2_t a) {
17727 return vminvq_f64(a);
17728 }
17729
17730 // CHECK-LABEL: @test_vmaxnmvq_f64(
17731 // CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
17732 // CHECK: ret double [[VMAXNMVQ_F64_I]]
test_vmaxnmvq_f64(float64x2_t a)17733 float64_t test_vmaxnmvq_f64(float64x2_t a) {
17734 return vmaxnmvq_f64(a);
17735 }
17736
17737 // CHECK-LABEL: @test_vmaxnmv_f32(
17738 // CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
17739 // CHECK: ret float [[VMAXNMV_F32_I]]
test_vmaxnmv_f32(float32x2_t a)17740 float32_t test_vmaxnmv_f32(float32x2_t a) {
17741 return vmaxnmv_f32(a);
17742 }
17743
17744 // CHECK-LABEL: @test_vminnmvq_f64(
17745 // CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
17746 // CHECK: ret double [[VMINNMVQ_F64_I]]
test_vminnmvq_f64(float64x2_t a)17747 float64_t test_vminnmvq_f64(float64x2_t a) {
17748 return vminnmvq_f64(a);
17749 }
17750
17751 // CHECK-LABEL: @test_vminnmv_f32(
17752 // CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
17753 // CHECK: ret float [[VMINNMV_F32_I]]
test_vminnmv_f32(float32x2_t a)17754 float32_t test_vminnmv_f32(float32x2_t a) {
17755 return vminnmv_f32(a);
17756 }
17757
17758 // CHECK-LABEL: @test_vpaddq_s64(
17759 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17760 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17761 // CHECK: ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_s64(int64x2_t a,int64x2_t b)17762 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
17763 return vpaddq_s64(a, b);
17764 }
17765
17766 // CHECK-LABEL: @test_vpaddq_u64(
17767 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
17768 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
17769 // CHECK: ret <2 x i64> [[VPADDQ_V2_I]]
test_vpaddq_u64(uint64x2_t a,uint64x2_t b)17770 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
17771 return vpaddq_u64(a, b);
17772 }
17773
17774 // CHECK-LABEL: @test_vpaddd_u64(
17775 // CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17776 // CHECK: ret i64 [[VPADDD_U64_I]]
test_vpaddd_u64(uint64x2_t a)17777 uint64_t test_vpaddd_u64(uint64x2_t a) {
17778 return vpaddd_u64(a);
17779 }
17780
17781 // CHECK-LABEL: @test_vaddvq_s64(
17782 // CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
17783 // CHECK: ret i64 [[VADDVQ_S64_I]]
test_vaddvq_s64(int64x2_t a)17784 int64_t test_vaddvq_s64(int64x2_t a) {
17785 return vaddvq_s64(a);
17786 }
17787
17788 // CHECK-LABEL: @test_vaddvq_u64(
17789 // CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17790 // CHECK: ret i64 [[VADDVQ_U64_I]]
test_vaddvq_u64(uint64x2_t a)17791 uint64_t test_vaddvq_u64(uint64x2_t a) {
17792 return vaddvq_u64(a);
17793 }
17794
17795 // CHECK-LABEL: @test_vadd_f64(
17796 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, %b
17797 // CHECK: ret <1 x double> [[ADD_I]]
test_vadd_f64(float64x1_t a,float64x1_t b)17798 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
17799 return vadd_f64(a, b);
17800 }
17801
17802 // CHECK-LABEL: @test_vmul_f64(
17803 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %a, %b
17804 // CHECK: ret <1 x double> [[MUL_I]]
test_vmul_f64(float64x1_t a,float64x1_t b)17805 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
17806 return vmul_f64(a, b);
17807 }
17808
17809 // CHECK-LABEL: @test_vdiv_f64(
17810 // CHECK: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
17811 // CHECK: ret <1 x double> [[DIV_I]]
test_vdiv_f64(float64x1_t a,float64x1_t b)17812 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
17813 return vdiv_f64(a, b);
17814 }
17815
17816 // CHECK-LABEL: @test_vmla_f64(
17817 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17818 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
17819 // CHECK: ret <1 x double> [[ADD_I]]
test_vmla_f64(float64x1_t a,float64x1_t b,float64x1_t c)17820 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17821 return vmla_f64(a, b, c);
17822 }
17823
17824 // CHECK-LABEL: @test_vmls_f64(
17825 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17826 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
17827 // CHECK: ret <1 x double> [[SUB_I]]
test_vmls_f64(float64x1_t a,float64x1_t b,float64x1_t c)17828 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17829 return vmls_f64(a, b, c);
17830 }
17831
17832 // CHECK-LABEL: @test_vfma_f64(
17833 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17834 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17835 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17836 // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
17837 // CHECK: ret <1 x double> [[TMP3]]
test_vfma_f64(float64x1_t a,float64x1_t b,float64x1_t c)17838 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17839 return vfma_f64(a, b, c);
17840 }
17841
17842 // CHECK-LABEL: @test_vfms_f64(
17843 // CHECK: [[SUB_I:%.*]] = fneg <1 x double> %b
17844 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17845 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
17846 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17847 // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
17848 // CHECK: ret <1 x double> [[TMP3]]
test_vfms_f64(float64x1_t a,float64x1_t b,float64x1_t c)17849 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17850 return vfms_f64(a, b, c);
17851 }
17852
17853 // CHECK-LABEL: @test_vsub_f64(
17854 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, %b
17855 // CHECK: ret <1 x double> [[SUB_I]]
test_vsub_f64(float64x1_t a,float64x1_t b)17856 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
17857 return vsub_f64(a, b);
17858 }
17859
17860 // CHECK-LABEL: @test_vabd_f64(
17861 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17862 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17863 // CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
17864 // CHECK: ret <1 x double> [[VABD2_I]]
test_vabd_f64(float64x1_t a,float64x1_t b)17865 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
17866 return vabd_f64(a, b);
17867 }
17868
17869 // CHECK-LABEL: @test_vmax_f64(
17870 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17871 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17872 // CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
17873 // CHECK: ret <1 x double> [[VMAX2_I]]
test_vmax_f64(float64x1_t a,float64x1_t b)17874 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
17875 return vmax_f64(a, b);
17876 }
17877
17878 // CHECK-LABEL: @test_vmin_f64(
17879 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17880 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17881 // CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
17882 // CHECK: ret <1 x double> [[VMIN2_I]]
test_vmin_f64(float64x1_t a,float64x1_t b)17883 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
17884 return vmin_f64(a, b);
17885 }
17886
17887 // CHECK-LABEL: @test_vmaxnm_f64(
17888 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17889 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17890 // CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
17891 // CHECK: ret <1 x double> [[VMAXNM2_I]]
test_vmaxnm_f64(float64x1_t a,float64x1_t b)17892 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
17893 return vmaxnm_f64(a, b);
17894 }
17895
17896 // CHECK-LABEL: @test_vminnm_f64(
17897 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17898 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17899 // CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
17900 // CHECK: ret <1 x double> [[VMINNM2_I]]
test_vminnm_f64(float64x1_t a,float64x1_t b)17901 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
17902 return vminnm_f64(a, b);
17903 }
17904
17905 // CHECK-LABEL: @test_vabs_f64(
17906 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17907 // CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
17908 // CHECK: ret <1 x double> [[VABS1_I]]
test_vabs_f64(float64x1_t a)17909 float64x1_t test_vabs_f64(float64x1_t a) {
17910 return vabs_f64(a);
17911 }
17912
17913 // CHECK-LABEL: @test_vneg_f64(
17914 // CHECK: [[SUB_I:%.*]] = fneg <1 x double> %a
17915 // CHECK: ret <1 x double> [[SUB_I]]
test_vneg_f64(float64x1_t a)17916 float64x1_t test_vneg_f64(float64x1_t a) {
17917 return vneg_f64(a);
17918 }
17919
17920 // CHECK-LABEL: @test_vcvt_s64_f64(
17921 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17922 // CHECK: [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64>
17923 // CHECK: ret <1 x i64> [[TMP1]]
test_vcvt_s64_f64(float64x1_t a)17924 int64x1_t test_vcvt_s64_f64(float64x1_t a) {
17925 return vcvt_s64_f64(a);
17926 }
17927
17928 // CHECK-LABEL: @test_vcvt_u64_f64(
17929 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17930 // CHECK: [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64>
17931 // CHECK: ret <1 x i64> [[TMP1]]
test_vcvt_u64_f64(float64x1_t a)17932 uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
17933 return vcvt_u64_f64(a);
17934 }
17935
17936 // CHECK-LABEL: @test_vcvtn_s64_f64(
17937 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17938 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
17939 // CHECK: ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_s64_f64(float64x1_t a)17940 int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
17941 return vcvtn_s64_f64(a);
17942 }
17943
17944 // CHECK-LABEL: @test_vcvtn_u64_f64(
17945 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17946 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
17947 // CHECK: ret <1 x i64> [[VCVTN1_I]]
test_vcvtn_u64_f64(float64x1_t a)17948 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
17949 return vcvtn_u64_f64(a);
17950 }
17951
17952 // CHECK-LABEL: @test_vcvtp_s64_f64(
17953 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17954 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
17955 // CHECK: ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_s64_f64(float64x1_t a)17956 int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
17957 return vcvtp_s64_f64(a);
17958 }
17959
17960 // CHECK-LABEL: @test_vcvtp_u64_f64(
17961 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17962 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
17963 // CHECK: ret <1 x i64> [[VCVTP1_I]]
test_vcvtp_u64_f64(float64x1_t a)17964 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
17965 return vcvtp_u64_f64(a);
17966 }
17967
17968 // CHECK-LABEL: @test_vcvtm_s64_f64(
17969 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17970 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
17971 // CHECK: ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_s64_f64(float64x1_t a)17972 int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
17973 return vcvtm_s64_f64(a);
17974 }
17975
17976 // CHECK-LABEL: @test_vcvtm_u64_f64(
17977 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17978 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
17979 // CHECK: ret <1 x i64> [[VCVTM1_I]]
test_vcvtm_u64_f64(float64x1_t a)17980 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
17981 return vcvtm_u64_f64(a);
17982 }
17983
17984 // CHECK-LABEL: @test_vcvta_s64_f64(
17985 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17986 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
17987 // CHECK: ret <1 x i64> [[VCVTA1_I]]
test_vcvta_s64_f64(float64x1_t a)17988 int64x1_t test_vcvta_s64_f64(float64x1_t a) {
17989 return vcvta_s64_f64(a);
17990 }
17991
17992 // CHECK-LABEL: @test_vcvta_u64_f64(
17993 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17994 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
17995 // CHECK: ret <1 x i64> [[VCVTA1_I]]
test_vcvta_u64_f64(float64x1_t a)17996 uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
17997 return vcvta_u64_f64(a);
17998 }
17999
18000 // CHECK-LABEL: @test_vcvt_f64_s64(
18001 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18002 // CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
18003 // CHECK: ret <1 x double> [[VCVT_I]]
test_vcvt_f64_s64(int64x1_t a)18004 float64x1_t test_vcvt_f64_s64(int64x1_t a) {
18005 return vcvt_f64_s64(a);
18006 }
18007
18008 // CHECK-LABEL: @test_vcvt_f64_u64(
18009 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18010 // CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
18011 // CHECK: ret <1 x double> [[VCVT_I]]
test_vcvt_f64_u64(uint64x1_t a)18012 float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
18013 return vcvt_f64_u64(a);
18014 }
18015
18016 // CHECK-LABEL: @test_vcvt_n_s64_f64(
18017 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18018 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18019 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18020 // CHECK: ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_s64_f64(float64x1_t a)18021 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
18022 return vcvt_n_s64_f64(a, 64);
18023 }
18024
18025 // CHECK-LABEL: @test_vcvt_n_u64_f64(
18026 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18027 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
18028 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
18029 // CHECK: ret <1 x i64> [[VCVT_N1]]
test_vcvt_n_u64_f64(float64x1_t a)18030 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
18031 return vcvt_n_u64_f64(a, 64);
18032 }
18033
18034 // CHECK-LABEL: @test_vcvt_n_f64_s64(
18035 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18036 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18037 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18038 // CHECK: ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_s64(int64x1_t a)18039 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
18040 return vcvt_n_f64_s64(a, 64);
18041 }
18042
18043 // CHECK-LABEL: @test_vcvt_n_f64_u64(
18044 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18045 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18046 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
18047 // CHECK: ret <1 x double> [[VCVT_N1]]
test_vcvt_n_f64_u64(uint64x1_t a)18048 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
18049 return vcvt_n_f64_u64(a, 64);
18050 }
18051
18052 // CHECK-LABEL: @test_vrndn_f64(
18053 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18054 // CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
18055 // CHECK: ret <1 x double> [[VRNDN1_I]]
test_vrndn_f64(float64x1_t a)18056 float64x1_t test_vrndn_f64(float64x1_t a) {
18057 return vrndn_f64(a);
18058 }
18059
18060 // CHECK-LABEL: @test_vrnda_f64(
18061 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18062 // CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
18063 // CHECK: ret <1 x double> [[VRNDA1_I]]
test_vrnda_f64(float64x1_t a)18064 float64x1_t test_vrnda_f64(float64x1_t a) {
18065 return vrnda_f64(a);
18066 }
18067
18068 // CHECK-LABEL: @test_vrndp_f64(
18069 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18070 // CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
18071 // CHECK: ret <1 x double> [[VRNDP1_I]]
test_vrndp_f64(float64x1_t a)18072 float64x1_t test_vrndp_f64(float64x1_t a) {
18073 return vrndp_f64(a);
18074 }
18075
18076 // CHECK-LABEL: @test_vrndm_f64(
18077 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18078 // CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
18079 // CHECK: ret <1 x double> [[VRNDM1_I]]
test_vrndm_f64(float64x1_t a)18080 float64x1_t test_vrndm_f64(float64x1_t a) {
18081 return vrndm_f64(a);
18082 }
18083
18084 // CHECK-LABEL: @test_vrndx_f64(
18085 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18086 // CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
18087 // CHECK: ret <1 x double> [[VRNDX1_I]]
test_vrndx_f64(float64x1_t a)18088 float64x1_t test_vrndx_f64(float64x1_t a) {
18089 return vrndx_f64(a);
18090 }
18091
18092 // CHECK-LABEL: @test_vrnd_f64(
18093 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18094 // CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
18095 // CHECK: ret <1 x double> [[VRNDZ1_I]]
test_vrnd_f64(float64x1_t a)18096 float64x1_t test_vrnd_f64(float64x1_t a) {
18097 return vrnd_f64(a);
18098 }
18099
18100 // CHECK-LABEL: @test_vrndi_f64(
18101 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18102 // CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
18103 // CHECK: ret <1 x double> [[VRNDI1_I]]
test_vrndi_f64(float64x1_t a)18104 float64x1_t test_vrndi_f64(float64x1_t a) {
18105 return vrndi_f64(a);
18106 }
18107
18108 // CHECK-LABEL: @test_vrsqrte_f64(
18109 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18110 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
18111 // CHECK: ret <1 x double> [[VRSQRTE_V1_I]]
test_vrsqrte_f64(float64x1_t a)18112 float64x1_t test_vrsqrte_f64(float64x1_t a) {
18113 return vrsqrte_f64(a);
18114 }
18115
18116 // CHECK-LABEL: @test_vrecpe_f64(
18117 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18118 // CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
18119 // CHECK: ret <1 x double> [[VRECPE_V1_I]]
test_vrecpe_f64(float64x1_t a)18120 float64x1_t test_vrecpe_f64(float64x1_t a) {
18121 return vrecpe_f64(a);
18122 }
18123
18124 // CHECK-LABEL: @test_vsqrt_f64(
18125 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18126 // CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
18127 // CHECK: ret <1 x double> [[VSQRT_I]]
test_vsqrt_f64(float64x1_t a)18128 float64x1_t test_vsqrt_f64(float64x1_t a) {
18129 return vsqrt_f64(a);
18130 }
18131
18132 // CHECK-LABEL: @test_vrecps_f64(
18133 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18134 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18135 // CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
18136 // CHECK: ret <1 x double> [[VRECPS_V2_I]]
test_vrecps_f64(float64x1_t a,float64x1_t b)18137 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
18138 return vrecps_f64(a, b);
18139 }
18140
18141 // CHECK-LABEL: @test_vrsqrts_f64(
18142 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
18143 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
18144 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
18145 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
18146 // CHECK: ret <1 x double> [[VRSQRTS_V2_I]]
test_vrsqrts_f64(float64x1_t a,float64x1_t b)18147 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
18148 return vrsqrts_f64(a, b);
18149 }
18150
18151 // CHECK-LABEL: @test_vminv_s32(
18152 // CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
18153 // CHECK: ret i32 [[VMINV_S32_I]]
test_vminv_s32(int32x2_t a)18154 int32_t test_vminv_s32(int32x2_t a) {
18155 return vminv_s32(a);
18156 }
18157
18158 // CHECK-LABEL: @test_vminv_u32(
18159 // CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
18160 // CHECK: ret i32 [[VMINV_U32_I]]
test_vminv_u32(uint32x2_t a)18161 uint32_t test_vminv_u32(uint32x2_t a) {
18162 return vminv_u32(a);
18163 }
18164
18165 // CHECK-LABEL: @test_vmaxv_s32(
18166 // CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
18167 // CHECK: ret i32 [[VMAXV_S32_I]]
test_vmaxv_s32(int32x2_t a)18168 int32_t test_vmaxv_s32(int32x2_t a) {
18169 return vmaxv_s32(a);
18170 }
18171
18172 // CHECK-LABEL: @test_vmaxv_u32(
18173 // CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
18174 // CHECK: ret i32 [[VMAXV_U32_I]]
test_vmaxv_u32(uint32x2_t a)18175 uint32_t test_vmaxv_u32(uint32x2_t a) {
18176 return vmaxv_u32(a);
18177 }
18178
18179 // CHECK-LABEL: @test_vaddv_s32(
18180 // CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
18181 // CHECK: ret i32 [[VADDV_S32_I]]
test_vaddv_s32(int32x2_t a)18182 int32_t test_vaddv_s32(int32x2_t a) {
18183 return vaddv_s32(a);
18184 }
18185
18186 // CHECK-LABEL: @test_vaddv_u32(
18187 // CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
18188 // CHECK: ret i32 [[VADDV_U32_I]]
test_vaddv_u32(uint32x2_t a)18189 uint32_t test_vaddv_u32(uint32x2_t a) {
18190 return vaddv_u32(a);
18191 }
18192
18193 // CHECK-LABEL: @test_vaddlv_s32(
18194 // CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
18195 // CHECK: ret i64 [[VADDLV_S32_I]]
test_vaddlv_s32(int32x2_t a)18196 int64_t test_vaddlv_s32(int32x2_t a) {
18197 return vaddlv_s32(a);
18198 }
18199
18200 // CHECK-LABEL: @test_vaddlv_u32(
18201 // CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
18202 // CHECK: ret i64 [[VADDLV_U32_I]]
test_vaddlv_u32(uint32x2_t a)18203 uint64_t test_vaddlv_u32(uint32x2_t a) {
18204 return vaddlv_u32(a);
18205 }
18206