1 // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
2 // RUN: -target-cpu swift -fallow-half-arguments-and-returns \
3 // RUN: -target-feature +fullfp16 -ffreestanding \
4 // RUN: -flax-vector-conversions=none \
5 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
6 // RUN: | opt -S -mem2reg | FileCheck %s
7
8 #include <arm_neon.h>
9
10 // CHECK-LABEL: @test_vaba_s8(
11 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
12 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
13 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t a,int8x8_t b,int8x8_t c)14 int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
15 return vaba_s8(a, b, c);
16 }
17
18 // CHECK-LABEL: @test_vaba_s16(
19 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
21 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
22 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
23 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
24 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t a,int16x4_t b,int16x4_t c)25 int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
26 return vaba_s16(a, b, c);
27 }
28
29 // CHECK-LABEL: @test_vaba_s32(
30 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
31 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
32 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
33 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
34 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
35 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t a,int32x2_t b,int32x2_t c)36 int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
37 return vaba_s32(a, b, c);
38 }
39
40 // CHECK-LABEL: @test_vaba_u8(
41 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
42 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
43 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)44 uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
45 return vaba_u8(a, b, c);
46 }
47
48 // CHECK-LABEL: @test_vaba_u16(
49 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
50 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
51 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
52 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
53 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
54 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)55 uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
56 return vaba_u16(a, b, c);
57 }
58
59 // CHECK-LABEL: @test_vaba_u32(
60 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
61 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
62 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
63 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
64 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
65 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)66 uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
67 return vaba_u32(a, b, c);
68 }
69
70 // CHECK-LABEL: @test_vabaq_s8(
71 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c)
72 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
73 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)74 int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
75 return vabaq_s8(a, b, c);
76 }
77
78 // CHECK-LABEL: @test_vabaq_s16(
79 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
80 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
81 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c)
82 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
83 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
84 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)85 int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
86 return vabaq_s16(a, b, c);
87 }
88
89 // CHECK-LABEL: @test_vabaq_s32(
90 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
91 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
92 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c)
93 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
94 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
95 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)96 int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
97 return vabaq_s32(a, b, c);
98 }
99
100 // CHECK-LABEL: @test_vabaq_u8(
101 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c)
102 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
103 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)104 uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
105 return vabaq_u8(a, b, c);
106 }
107
108 // CHECK-LABEL: @test_vabaq_u16(
109 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
110 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
111 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c)
112 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
113 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
114 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)115 uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
116 return vabaq_u16(a, b, c);
117 }
118
119 // CHECK-LABEL: @test_vabaq_u32(
120 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
121 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
122 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c)
123 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
124 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
125 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)126 uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
127 return vabaq_u32(a, b, c);
128 }
129
130 // CHECK-LABEL: @test_vabal_s8(
131 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
132 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
133 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
134 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)135 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
136 return vabal_s8(a, b, c);
137 }
138
139 // CHECK-LABEL: @test_vabal_s16(
140 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
141 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
142 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
143 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
144 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
145 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
146 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
147 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)148 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
149 return vabal_s16(a, b, c);
150 }
151
152 // CHECK-LABEL: @test_vabal_s32(
153 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
154 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
155 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
156 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
157 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
158 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
159 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
160 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)161 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
162 return vabal_s32(a, b, c);
163 }
164
165 // CHECK-LABEL: @test_vabal_u8(
166 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
167 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
168 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
169 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)170 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
171 return vabal_u8(a, b, c);
172 }
173
174 // CHECK-LABEL: @test_vabal_u16(
175 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
176 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
177 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
178 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
179 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
180 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
181 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
182 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)183 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
184 return vabal_u16(a, b, c);
185 }
186
187 // CHECK-LABEL: @test_vabal_u32(
188 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
189 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
190 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
191 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
192 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
193 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
194 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
195 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)196 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
197 return vabal_u32(a, b, c);
198 }
199
200 // CHECK-LABEL: @test_vabd_s8(
201 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
202 // CHECK: ret <8 x i8> [[VABD_V_I]]
test_vabd_s8(int8x8_t a,int8x8_t b)203 int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) {
204 return vabd_s8(a, b);
205 }
206
207 // CHECK-LABEL: @test_vabd_s16(
208 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
209 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
210 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
211 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
212 // CHECK: ret <4 x i16> [[VABD_V2_I]]
test_vabd_s16(int16x4_t a,int16x4_t b)213 int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) {
214 return vabd_s16(a, b);
215 }
216
217 // CHECK-LABEL: @test_vabd_s32(
218 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
219 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
220 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
221 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
222 // CHECK: ret <2 x i32> [[VABD_V2_I]]
test_vabd_s32(int32x2_t a,int32x2_t b)223 int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) {
224 return vabd_s32(a, b);
225 }
226
227 // CHECK-LABEL: @test_vabd_u8(
228 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
229 // CHECK: ret <8 x i8> [[VABD_V_I]]
test_vabd_u8(uint8x8_t a,uint8x8_t b)230 uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) {
231 return vabd_u8(a, b);
232 }
233
234 // CHECK-LABEL: @test_vabd_u16(
235 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
236 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
237 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
238 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
239 // CHECK: ret <4 x i16> [[VABD_V2_I]]
test_vabd_u16(uint16x4_t a,uint16x4_t b)240 uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) {
241 return vabd_u16(a, b);
242 }
243
244 // CHECK-LABEL: @test_vabd_u32(
245 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
246 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
247 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
248 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
249 // CHECK: ret <2 x i32> [[VABD_V2_I]]
test_vabd_u32(uint32x2_t a,uint32x2_t b)250 uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) {
251 return vabd_u32(a, b);
252 }
253
254 // CHECK-LABEL: @test_vabd_f32(
255 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
256 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
257 // CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b)
258 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8>
259 // CHECK: ret <2 x float> [[VABD_V2_I]]
test_vabd_f32(float32x2_t a,float32x2_t b)260 float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) {
261 return vabd_f32(a, b);
262 }
263
264 // CHECK-LABEL: @test_vabdq_s8(
265 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b)
266 // CHECK: ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_s8(int8x16_t a,int8x16_t b)267 int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) {
268 return vabdq_s8(a, b);
269 }
270
271 // CHECK-LABEL: @test_vabdq_s16(
272 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
273 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
274 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b)
275 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
276 // CHECK: ret <8 x i16> [[VABDQ_V2_I]]
test_vabdq_s16(int16x8_t a,int16x8_t b)277 int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) {
278 return vabdq_s16(a, b);
279 }
280
281 // CHECK-LABEL: @test_vabdq_s32(
282 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
283 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
284 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b)
285 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
286 // CHECK: ret <4 x i32> [[VABDQ_V2_I]]
test_vabdq_s32(int32x4_t a,int32x4_t b)287 int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) {
288 return vabdq_s32(a, b);
289 }
290
291 // CHECK-LABEL: @test_vabdq_u8(
292 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b)
293 // CHECK: ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_u8(uint8x16_t a,uint8x16_t b)294 uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) {
295 return vabdq_u8(a, b);
296 }
297
298 // CHECK-LABEL: @test_vabdq_u16(
299 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
300 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
301 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b)
302 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
303 // CHECK: ret <8 x i16> [[VABDQ_V2_I]]
test_vabdq_u16(uint16x8_t a,uint16x8_t b)304 uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) {
305 return vabdq_u16(a, b);
306 }
307
308 // CHECK-LABEL: @test_vabdq_u32(
309 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
310 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
311 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b)
312 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
313 // CHECK: ret <4 x i32> [[VABDQ_V2_I]]
test_vabdq_u32(uint32x4_t a,uint32x4_t b)314 uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) {
315 return vabdq_u32(a, b);
316 }
317
318 // CHECK-LABEL: @test_vabdq_f32(
319 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
320 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
321 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b)
322 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8>
323 // CHECK: ret <4 x float> [[VABDQ_V2_I]]
test_vabdq_f32(float32x4_t a,float32x4_t b)324 float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) {
325 return vabdq_f32(a, b);
326 }
327
328 // CHECK-LABEL: @test_vabdl_s8(
329 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
330 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
331 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)332 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
333 return vabdl_s8(a, b);
334 }
335
336 // CHECK-LABEL: @test_vabdl_s16(
337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
339 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
340 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
341 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
342 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
343 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)344 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
345 return vabdl_s16(a, b);
346 }
347
348 // CHECK-LABEL: @test_vabdl_s32(
349 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
350 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
351 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
352 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
353 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
354 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
355 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)356 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
357 return vabdl_s32(a, b);
358 }
359
360 // CHECK-LABEL: @test_vabdl_u8(
361 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
362 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
363 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)364 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
365 return vabdl_u8(a, b);
366 }
367
368 // CHECK-LABEL: @test_vabdl_u16(
369 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
370 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
371 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
372 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
373 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
374 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
375 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)376 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
377 return vabdl_u16(a, b);
378 }
379
380 // CHECK-LABEL: @test_vabdl_u32(
381 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
382 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
383 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
384 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
385 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
386 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
387 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)388 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
389 return vabdl_u32(a, b);
390 }
391
392 // CHECK-LABEL: @test_vabs_s8(
393 // CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a)
394 // CHECK: ret <8 x i8> [[VABS_I]]
test_vabs_s8(int8x8_t a)395 int8x8_t test_vabs_s8(int8x8_t a) {
396 return vabs_s8(a);
397 }
398
399 // CHECK-LABEL: @test_vabs_s16(
400 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
401 // CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a)
402 // CHECK: ret <4 x i16> [[VABS1_I]]
test_vabs_s16(int16x4_t a)403 int16x4_t test_vabs_s16(int16x4_t a) {
404 return vabs_s16(a);
405 }
406
407 // CHECK-LABEL: @test_vabs_s32(
408 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
409 // CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a)
410 // CHECK: ret <2 x i32> [[VABS1_I]]
test_vabs_s32(int32x2_t a)411 int32x2_t test_vabs_s32(int32x2_t a) {
412 return vabs_s32(a);
413 }
414
415 // CHECK-LABEL: @test_vabs_f32(
416 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
417 // CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
418 // CHECK: ret <2 x float> [[VABS1_I]]
test_vabs_f32(float32x2_t a)419 float32x2_t test_vabs_f32(float32x2_t a) {
420 return vabs_f32(a);
421 }
422
423 // CHECK-LABEL: @test_vabsq_s8(
424 // CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a)
425 // CHECK: ret <16 x i8> [[VABS_I]]
test_vabsq_s8(int8x16_t a)426 int8x16_t test_vabsq_s8(int8x16_t a) {
427 return vabsq_s8(a);
428 }
429
430 // CHECK-LABEL: @test_vabsq_s16(
431 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
432 // CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a)
433 // CHECK: ret <8 x i16> [[VABS1_I]]
test_vabsq_s16(int16x8_t a)434 int16x8_t test_vabsq_s16(int16x8_t a) {
435 return vabsq_s16(a);
436 }
437
438 // CHECK-LABEL: @test_vabsq_s32(
439 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
440 // CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a)
441 // CHECK: ret <4 x i32> [[VABS1_I]]
test_vabsq_s32(int32x4_t a)442 int32x4_t test_vabsq_s32(int32x4_t a) {
443 return vabsq_s32(a);
444 }
445
446 // CHECK-LABEL: @test_vabsq_f32(
447 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
448 // CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
449 // CHECK: ret <4 x float> [[VABS1_I]]
test_vabsq_f32(float32x4_t a)450 float32x4_t test_vabsq_f32(float32x4_t a) {
451 return vabsq_f32(a);
452 }
453
454 // CHECK-LABEL: @test_vadd_s8(
455 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
456 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t a,int8x8_t b)457 int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) {
458 return vadd_s8(a, b);
459 }
460
461 // CHECK-LABEL: @test_vadd_s16(
462 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
463 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t a,int16x4_t b)464 int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) {
465 return vadd_s16(a, b);
466 }
467
468 // CHECK-LABEL: @test_vadd_s32(
469 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
470 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t a,int32x2_t b)471 int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) {
472 return vadd_s32(a, b);
473 }
474
475 // CHECK-LABEL: @test_vadd_s64(
476 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
477 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t a,int64x1_t b)478 int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) {
479 return vadd_s64(a, b);
480 }
481
482 // CHECK-LABEL: @test_vadd_f32(
483 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, %b
484 // CHECK: ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t a,float32x2_t b)485 float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) {
486 return vadd_f32(a, b);
487 }
488
489 // CHECK-LABEL: @test_vadd_u8(
490 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
491 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t a,uint8x8_t b)492 uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) {
493 return vadd_u8(a, b);
494 }
495
496 // CHECK-LABEL: @test_vadd_u16(
497 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
498 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t a,uint16x4_t b)499 uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) {
500 return vadd_u16(a, b);
501 }
502
503 // CHECK-LABEL: @test_vadd_u32(
504 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
505 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t a,uint32x2_t b)506 uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) {
507 return vadd_u32(a, b);
508 }
509
510 // CHECK-LABEL: @test_vadd_u64(
511 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
512 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t a,uint64x1_t b)513 uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) {
514 return vadd_u64(a, b);
515 }
516
517 // CHECK-LABEL: @test_vaddq_s8(
518 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
519 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t a,int8x16_t b)520 int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) {
521 return vaddq_s8(a, b);
522 }
523
524 // CHECK-LABEL: @test_vaddq_s16(
525 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
526 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t a,int16x8_t b)527 int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) {
528 return vaddq_s16(a, b);
529 }
530
531 // CHECK-LABEL: @test_vaddq_s32(
532 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
533 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t a,int32x4_t b)534 int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) {
535 return vaddq_s32(a, b);
536 }
537
538 // CHECK-LABEL: @test_vaddq_s64(
539 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
540 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t a,int64x2_t b)541 int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) {
542 return vaddq_s64(a, b);
543 }
544
545 // CHECK-LABEL: @test_vaddq_f32(
546 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, %b
547 // CHECK: ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t a,float32x4_t b)548 float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) {
549 return vaddq_f32(a, b);
550 }
551
552 // CHECK-LABEL: @test_vaddq_u8(
553 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
554 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t a,uint8x16_t b)555 uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) {
556 return vaddq_u8(a, b);
557 }
558
559 // CHECK-LABEL: @test_vaddq_u16(
560 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
561 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t a,uint16x8_t b)562 uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) {
563 return vaddq_u16(a, b);
564 }
565
566 // CHECK-LABEL: @test_vaddq_u32(
567 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
568 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t a,uint32x4_t b)569 uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) {
570 return vaddq_u32(a, b);
571 }
572
573 // CHECK-LABEL: @test_vaddq_u64(
574 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
575 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t a,uint64x2_t b)576 uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) {
577 return vaddq_u64(a, b);
578 }
579
580 // CHECK-LABEL: @test_vaddhn_s16(
581 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
582 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
583 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
584 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
585 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
586 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)587 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
588 return vaddhn_s16(a, b);
589 }
590
591 // CHECK-LABEL: @test_vaddhn_s32(
592 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
593 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
594 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
595 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
596 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
597 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)598 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
599 return vaddhn_s32(a, b);
600 }
601
602 // CHECK-LABEL: @test_vaddhn_s64(
603 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
604 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
605 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
606 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
607 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
608 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)609 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
610 return vaddhn_s64(a, b);
611 }
612
613 // CHECK-LABEL: @test_vaddhn_u16(
614 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
615 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
616 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
617 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
618 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
619 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)620 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
621 return vaddhn_u16(a, b);
622 }
623
624 // CHECK-LABEL: @test_vaddhn_u32(
625 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
626 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
627 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
628 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
629 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
630 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)631 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
632 return vaddhn_u32(a, b);
633 }
634
635 // CHECK-LABEL: @test_vaddhn_u64(
636 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
637 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
638 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
639 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
640 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
641 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)642 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
643 return vaddhn_u64(a, b);
644 }
645
646 // CHECK-LABEL: @test_vaddl_s8(
647 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
648 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
649 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
650 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)651 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
652 return vaddl_s8(a, b);
653 }
654
655 // CHECK-LABEL: @test_vaddl_s16(
656 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
657 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
658 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
659 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
660 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
661 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)662 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
663 return vaddl_s16(a, b);
664 }
665
666 // CHECK-LABEL: @test_vaddl_s32(
667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
668 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
669 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
670 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
671 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
672 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)673 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
674 return vaddl_s32(a, b);
675 }
676
677 // CHECK-LABEL: @test_vaddl_u8(
678 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
679 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
680 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
681 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)682 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
683 return vaddl_u8(a, b);
684 }
685
686 // CHECK-LABEL: @test_vaddl_u16(
687 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
688 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
689 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
690 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
691 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
692 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)693 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
694 return vaddl_u16(a, b);
695 }
696
697 // CHECK-LABEL: @test_vaddl_u32(
698 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
699 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
700 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
701 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
702 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
703 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)704 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
705 return vaddl_u32(a, b);
706 }
707
708 // CHECK-LABEL: @test_vaddw_s8(
709 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
710 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
711 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)712 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
713 return vaddw_s8(a, b);
714 }
715
716 // CHECK-LABEL: @test_vaddw_s16(
717 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
718 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
719 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
720 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)721 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
722 return vaddw_s16(a, b);
723 }
724
725 // CHECK-LABEL: @test_vaddw_s32(
726 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
727 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
728 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
729 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)730 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
731 return vaddw_s32(a, b);
732 }
733
734 // CHECK-LABEL: @test_vaddw_u8(
735 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
736 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
737 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)738 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
739 return vaddw_u8(a, b);
740 }
741
742 // CHECK-LABEL: @test_vaddw_u16(
743 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
744 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
745 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
746 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)747 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
748 return vaddw_u16(a, b);
749 }
750
751 // CHECK-LABEL: @test_vaddw_u32(
752 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
753 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
754 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
755 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)756 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
757 return vaddw_u32(a, b);
758 }
759
760 // CHECK-LABEL: @test_vand_s8(
761 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
762 // CHECK: ret <8 x i8> [[AND_I]]
test_vand_s8(int8x8_t a,int8x8_t b)763 int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
764 return vand_s8(a, b);
765 }
766
767 // CHECK-LABEL: @test_vand_s16(
768 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
769 // CHECK: ret <4 x i16> [[AND_I]]
test_vand_s16(int16x4_t a,int16x4_t b)770 int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
771 return vand_s16(a, b);
772 }
773
774 // CHECK-LABEL: @test_vand_s32(
775 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
776 // CHECK: ret <2 x i32> [[AND_I]]
test_vand_s32(int32x2_t a,int32x2_t b)777 int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
778 return vand_s32(a, b);
779 }
780
781 // CHECK-LABEL: @test_vand_s64(
782 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
783 // CHECK: ret <1 x i64> [[AND_I]]
test_vand_s64(int64x1_t a,int64x1_t b)784 int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
785 return vand_s64(a, b);
786 }
787
788 // CHECK-LABEL: @test_vand_u8(
789 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
790 // CHECK: ret <8 x i8> [[AND_I]]
test_vand_u8(uint8x8_t a,uint8x8_t b)791 uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
792 return vand_u8(a, b);
793 }
794
795 // CHECK-LABEL: @test_vand_u16(
796 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
797 // CHECK: ret <4 x i16> [[AND_I]]
test_vand_u16(uint16x4_t a,uint16x4_t b)798 uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
799 return vand_u16(a, b);
800 }
801
802 // CHECK-LABEL: @test_vand_u32(
803 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
804 // CHECK: ret <2 x i32> [[AND_I]]
test_vand_u32(uint32x2_t a,uint32x2_t b)805 uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
806 return vand_u32(a, b);
807 }
808
809 // CHECK-LABEL: @test_vand_u64(
810 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
811 // CHECK: ret <1 x i64> [[AND_I]]
test_vand_u64(uint64x1_t a,uint64x1_t b)812 uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
813 return vand_u64(a, b);
814 }
815
816 // CHECK-LABEL: @test_vandq_s8(
817 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
818 // CHECK: ret <16 x i8> [[AND_I]]
test_vandq_s8(int8x16_t a,int8x16_t b)819 int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
820 return vandq_s8(a, b);
821 }
822
823 // CHECK-LABEL: @test_vandq_s16(
824 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
825 // CHECK: ret <8 x i16> [[AND_I]]
test_vandq_s16(int16x8_t a,int16x8_t b)826 int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
827 return vandq_s16(a, b);
828 }
829
830 // CHECK-LABEL: @test_vandq_s32(
831 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
832 // CHECK: ret <4 x i32> [[AND_I]]
test_vandq_s32(int32x4_t a,int32x4_t b)833 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
834 return vandq_s32(a, b);
835 }
836
837 // CHECK-LABEL: @test_vandq_s64(
838 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
839 // CHECK: ret <2 x i64> [[AND_I]]
test_vandq_s64(int64x2_t a,int64x2_t b)840 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
841 return vandq_s64(a, b);
842 }
843
844 // CHECK-LABEL: @test_vandq_u8(
845 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
846 // CHECK: ret <16 x i8> [[AND_I]]
test_vandq_u8(uint8x16_t a,uint8x16_t b)847 uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
848 return vandq_u8(a, b);
849 }
850
851 // CHECK-LABEL: @test_vandq_u16(
852 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
853 // CHECK: ret <8 x i16> [[AND_I]]
test_vandq_u16(uint16x8_t a,uint16x8_t b)854 uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
855 return vandq_u16(a, b);
856 }
857
858 // CHECK-LABEL: @test_vandq_u32(
859 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
860 // CHECK: ret <4 x i32> [[AND_I]]
test_vandq_u32(uint32x4_t a,uint32x4_t b)861 uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
862 return vandq_u32(a, b);
863 }
864
865 // CHECK-LABEL: @test_vandq_u64(
866 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
867 // CHECK: ret <2 x i64> [[AND_I]]
test_vandq_u64(uint64x2_t a,uint64x2_t b)868 uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
869 return vandq_u64(a, b);
870 }
871
872 // CHECK-LABEL: @test_vbic_s8(
873 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
874 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
875 // CHECK: ret <8 x i8> [[AND_I]]
test_vbic_s8(int8x8_t a,int8x8_t b)876 int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
877 return vbic_s8(a, b);
878 }
879
880 // CHECK-LABEL: @test_vbic_s16(
881 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
882 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
883 // CHECK: ret <4 x i16> [[AND_I]]
test_vbic_s16(int16x4_t a,int16x4_t b)884 int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
885 return vbic_s16(a, b);
886 }
887
888 // CHECK-LABEL: @test_vbic_s32(
889 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
890 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
891 // CHECK: ret <2 x i32> [[AND_I]]
test_vbic_s32(int32x2_t a,int32x2_t b)892 int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
893 return vbic_s32(a, b);
894 }
895
896 // CHECK-LABEL: @test_vbic_s64(
897 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
898 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
899 // CHECK: ret <1 x i64> [[AND_I]]
test_vbic_s64(int64x1_t a,int64x1_t b)900 int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
901 return vbic_s64(a, b);
902 }
903
904 // CHECK-LABEL: @test_vbic_u8(
905 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
906 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
907 // CHECK: ret <8 x i8> [[AND_I]]
test_vbic_u8(uint8x8_t a,uint8x8_t b)908 uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
909 return vbic_u8(a, b);
910 }
911
912 // CHECK-LABEL: @test_vbic_u16(
913 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
914 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
915 // CHECK: ret <4 x i16> [[AND_I]]
test_vbic_u16(uint16x4_t a,uint16x4_t b)916 uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
917 return vbic_u16(a, b);
918 }
919
920 // CHECK-LABEL: @test_vbic_u32(
921 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
922 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
923 // CHECK: ret <2 x i32> [[AND_I]]
test_vbic_u32(uint32x2_t a,uint32x2_t b)924 uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
925 return vbic_u32(a, b);
926 }
927
928 // CHECK-LABEL: @test_vbic_u64(
929 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
930 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
931 // CHECK: ret <1 x i64> [[AND_I]]
test_vbic_u64(uint64x1_t a,uint64x1_t b)932 uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
933 return vbic_u64(a, b);
934 }
935
936 // CHECK-LABEL: @test_vbicq_s8(
937 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
938 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
939 // CHECK: ret <16 x i8> [[AND_I]]
test_vbicq_s8(int8x16_t a,int8x16_t b)940 int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
941 return vbicq_s8(a, b);
942 }
943
944 // CHECK-LABEL: @test_vbicq_s16(
945 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
946 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
947 // CHECK: ret <8 x i16> [[AND_I]]
test_vbicq_s16(int16x8_t a,int16x8_t b)948 int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
949 return vbicq_s16(a, b);
950 }
951
952 // CHECK-LABEL: @test_vbicq_s32(
953 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
954 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
955 // CHECK: ret <4 x i32> [[AND_I]]
test_vbicq_s32(int32x4_t a,int32x4_t b)956 int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
957 return vbicq_s32(a, b);
958 }
959
960 // CHECK-LABEL: @test_vbicq_s64(
961 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
962 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
963 // CHECK: ret <2 x i64> [[AND_I]]
test_vbicq_s64(int64x2_t a,int64x2_t b)964 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
965 return vbicq_s64(a, b);
966 }
967
968 // CHECK-LABEL: @test_vbicq_u8(
969 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
970 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
971 // CHECK: ret <16 x i8> [[AND_I]]
test_vbicq_u8(uint8x16_t a,uint8x16_t b)972 uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
973 return vbicq_u8(a, b);
974 }
975
976 // CHECK-LABEL: @test_vbicq_u16(
977 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
978 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
979 // CHECK: ret <8 x i16> [[AND_I]]
test_vbicq_u16(uint16x8_t a,uint16x8_t b)980 uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
981 return vbicq_u16(a, b);
982 }
983
984 // CHECK-LABEL: @test_vbicq_u32(
985 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
986 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
987 // CHECK: ret <4 x i32> [[AND_I]]
test_vbicq_u32(uint32x4_t a,uint32x4_t b)988 uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
989 return vbicq_u32(a, b);
990 }
991
992 // CHECK-LABEL: @test_vbicq_u64(
993 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
994 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
995 // CHECK: ret <2 x i64> [[AND_I]]
test_vbicq_u64(uint64x2_t a,uint64x2_t b)996 uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
997 return vbicq_u64(a, b);
998 }
999
1000 // CHECK-LABEL: @test_vbsl_s8(
1001 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1002 // CHECK: ret <8 x i8> [[VBSL_V_I]]
test_vbsl_s8(uint8x8_t a,int8x8_t b,int8x8_t c)1003 int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) {
1004 return vbsl_s8(a, b, c);
1005 }
1006
1007 // CHECK-LABEL: @test_vbsl_s16(
1008 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1009 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1010 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1011 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1012 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1013 // CHECK: ret <4 x i16> [[TMP3]]
test_vbsl_s16(uint16x4_t a,int16x4_t b,int16x4_t c)1014 int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) {
1015 return vbsl_s16(a, b, c);
1016 }
1017
1018 // CHECK-LABEL: @test_vbsl_s32(
1019 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1020 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1021 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1022 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1023 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1024 // CHECK: ret <2 x i32> [[TMP3]]
test_vbsl_s32(uint32x2_t a,int32x2_t b,int32x2_t c)1025 int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) {
1026 return vbsl_s32(a, b, c);
1027 }
1028
1029 // CHECK-LABEL: @test_vbsl_s64(
1030 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1031 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1032 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1033 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1034 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1035 // CHECK: ret <1 x i64> [[TMP3]]
test_vbsl_s64(uint64x1_t a,int64x1_t b,int64x1_t c)1036 int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) {
1037 return vbsl_s64(a, b, c);
1038 }
1039
1040 // CHECK-LABEL: @test_vbsl_u8(
1041 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1042 // CHECK: ret <8 x i8> [[VBSL_V_I]]
test_vbsl_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)1043 uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
1044 return vbsl_u8(a, b, c);
1045 }
1046
1047 // CHECK-LABEL: @test_vbsl_u16(
1048 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1049 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1050 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1051 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1052 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1053 // CHECK: ret <4 x i16> [[TMP3]]
test_vbsl_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)1054 uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
1055 return vbsl_u16(a, b, c);
1056 }
1057
1058 // CHECK-LABEL: @test_vbsl_u32(
1059 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1060 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1061 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1062 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1063 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1064 // CHECK: ret <2 x i32> [[TMP3]]
test_vbsl_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)1065 uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
1066 return vbsl_u32(a, b, c);
1067 }
1068
1069 // CHECK-LABEL: @test_vbsl_u64(
1070 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1071 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1072 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1073 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1074 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1075 // CHECK: ret <1 x i64> [[TMP3]]
test_vbsl_u64(uint64x1_t a,uint64x1_t b,uint64x1_t c)1076 uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
1077 return vbsl_u64(a, b, c);
1078 }
1079
1080 // CHECK-LABEL: @test_vbsl_f32(
1081 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1082 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1083 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
1084 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1085 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float>
1086 // CHECK: ret <2 x float> [[TMP3]]
test_vbsl_f32(uint32x2_t a,float32x2_t b,float32x2_t c)1087 float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) {
1088 return vbsl_f32(a, b, c);
1089 }
1090
1091 // CHECK-LABEL: @test_vbsl_p8(
1092 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1093 // CHECK: ret <8 x i8> [[VBSL_V_I]]
test_vbsl_p8(uint8x8_t a,poly8x8_t b,poly8x8_t c)1094 poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
1095 return vbsl_p8(a, b, c);
1096 }
1097
1098 // CHECK-LABEL: @test_vbsl_p16(
1099 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1100 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1101 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1102 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1103 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1104 // CHECK: ret <4 x i16> [[TMP3]]
test_vbsl_p16(uint16x4_t a,poly16x4_t b,poly16x4_t c)1105 poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
1106 return vbsl_p16(a, b, c);
1107 }
1108
1109 // CHECK-LABEL: @test_vbslq_s8(
1110 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1111 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_s8(uint8x16_t a,int8x16_t b,int8x16_t c)1112 int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) {
1113 return vbslq_s8(a, b, c);
1114 }
1115
1116 // CHECK-LABEL: @test_vbslq_s16(
1117 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1118 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1119 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1120 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1121 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1122 // CHECK: ret <8 x i16> [[TMP3]]
test_vbslq_s16(uint16x8_t a,int16x8_t b,int16x8_t c)1123 int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) {
1124 return vbslq_s16(a, b, c);
1125 }
1126
1127 // CHECK-LABEL: @test_vbslq_s32(
1128 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1129 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1130 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1131 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1132 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1133 // CHECK: ret <4 x i32> [[TMP3]]
test_vbslq_s32(uint32x4_t a,int32x4_t b,int32x4_t c)1134 int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) {
1135 return vbslq_s32(a, b, c);
1136 }
1137
1138 // CHECK-LABEL: @test_vbslq_s64(
1139 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1140 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1141 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1142 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1143 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1144 // CHECK: ret <2 x i64> [[TMP3]]
test_vbslq_s64(uint64x2_t a,int64x2_t b,int64x2_t c)1145 int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) {
1146 return vbslq_s64(a, b, c);
1147 }
1148
1149 // CHECK-LABEL: @test_vbslq_u8(
1150 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1151 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)1152 uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
1153 return vbslq_u8(a, b, c);
1154 }
1155
1156 // CHECK-LABEL: @test_vbslq_u16(
1157 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1158 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1159 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1160 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1161 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1162 // CHECK: ret <8 x i16> [[TMP3]]
test_vbslq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)1163 uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
1164 return vbslq_u16(a, b, c);
1165 }
1166
1167 // CHECK-LABEL: @test_vbslq_u32(
1168 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1169 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1170 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1171 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1172 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1173 // CHECK: ret <4 x i32> [[TMP3]]
test_vbslq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)1174 uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
1175 return vbslq_u32(a, b, c);
1176 }
1177
1178 // CHECK-LABEL: @test_vbslq_u64(
1179 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1180 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1181 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1182 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1183 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1184 // CHECK: ret <2 x i64> [[TMP3]]
test_vbslq_u64(uint64x2_t a,uint64x2_t b,uint64x2_t c)1185 uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
1186 return vbslq_u64(a, b, c);
1187 }
1188
1189 // CHECK-LABEL: @test_vbslq_f32(
1190 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1191 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1192 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
1193 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1194 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float>
1195 // CHECK: ret <4 x float> [[TMP3]]
test_vbslq_f32(uint32x4_t a,float32x4_t b,float32x4_t c)1196 float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) {
1197 return vbslq_f32(a, b, c);
1198 }
1199
1200 // CHECK-LABEL: @test_vbslq_p8(
1201 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1202 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_p8(uint8x16_t a,poly8x16_t b,poly8x16_t c)1203 poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
1204 return vbslq_p8(a, b, c);
1205 }
1206
1207 // CHECK-LABEL: @test_vbslq_p16(
1208 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1209 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1210 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1211 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1212 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1213 // CHECK: ret <8 x i16> [[TMP3]]
test_vbslq_p16(uint16x8_t a,poly16x8_t b,poly16x8_t c)1214 poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
1215 return vbslq_p16(a, b, c);
1216 }
1217
1218 // CHECK-LABEL: @test_vcage_f32(
1219 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1220 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1221 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1222 // CHECK: ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t a,float32x2_t b)1223 uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) {
1224 return vcage_f32(a, b);
1225 }
1226
1227 // CHECK-LABEL: @test_vcageq_f32(
1228 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1229 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1230 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1231 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t a,float32x4_t b)1232 uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) {
1233 return vcageq_f32(a, b);
1234 }
1235
1236 // CHECK-LABEL: @test_vcagt_f32(
1237 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1238 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1239 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1240 // CHECK: ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t a,float32x2_t b)1241 uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) {
1242 return vcagt_f32(a, b);
1243 }
1244
1245 // CHECK-LABEL: @test_vcagtq_f32(
1246 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1247 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1248 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1249 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t a,float32x4_t b)1250 uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) {
1251 return vcagtq_f32(a, b);
1252 }
1253
1254 // CHECK-LABEL: @test_vcale_f32(
1255 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1256 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1257 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1258 // CHECK: ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t a,float32x2_t b)1259 uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) {
1260 return vcale_f32(a, b);
1261 }
1262
1263 // CHECK-LABEL: @test_vcaleq_f32(
1264 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1265 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1266 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1267 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t a,float32x4_t b)1268 uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) {
1269 return vcaleq_f32(a, b);
1270 }
1271
1272 // CHECK-LABEL: @test_vcalt_f32(
1273 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1274 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1275 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1276 // CHECK: ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t a,float32x2_t b)1277 uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) {
1278 return vcalt_f32(a, b);
1279 }
1280
1281 // CHECK-LABEL: @test_vcaltq_f32(
1282 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1283 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1284 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1285 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t a,float32x4_t b)1286 uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) {
1287 return vcaltq_f32(a, b);
1288 }
1289
1290 // CHECK-LABEL: @test_vceq_s8(
1291 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1292 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1293 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t a,int8x8_t b)1294 uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) {
1295 return vceq_s8(a, b);
1296 }
1297
1298 // CHECK-LABEL: @test_vceq_s16(
1299 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1300 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1301 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t a,int16x4_t b)1302 uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) {
1303 return vceq_s16(a, b);
1304 }
1305
1306 // CHECK-LABEL: @test_vceq_s32(
1307 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1308 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1309 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t a,int32x2_t b)1310 uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) {
1311 return vceq_s32(a, b);
1312 }
1313
1314 // CHECK-LABEL: @test_vceq_f32(
1315 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b
1316 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1317 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t a,float32x2_t b)1318 uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) {
1319 return vceq_f32(a, b);
1320 }
1321
1322 // CHECK-LABEL: @test_vceq_u8(
1323 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1324 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1325 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t a,uint8x8_t b)1326 uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) {
1327 return vceq_u8(a, b);
1328 }
1329
1330 // CHECK-LABEL: @test_vceq_u16(
1331 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1332 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1333 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t a,uint16x4_t b)1334 uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) {
1335 return vceq_u16(a, b);
1336 }
1337
1338 // CHECK-LABEL: @test_vceq_u32(
1339 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1340 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1341 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t a,uint32x2_t b)1342 uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) {
1343 return vceq_u32(a, b);
1344 }
1345
1346 // CHECK-LABEL: @test_vceq_p8(
1347 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1348 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1349 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t a,poly8x8_t b)1350 uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) {
1351 return vceq_p8(a, b);
1352 }
1353
1354 // CHECK-LABEL: @test_vceqq_s8(
1355 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1356 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1357 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t a,int8x16_t b)1358 uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) {
1359 return vceqq_s8(a, b);
1360 }
1361
1362 // CHECK-LABEL: @test_vceqq_s16(
1363 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1364 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1365 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t a,int16x8_t b)1366 uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) {
1367 return vceqq_s16(a, b);
1368 }
1369
1370 // CHECK-LABEL: @test_vceqq_s32(
1371 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1372 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1373 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t a,int32x4_t b)1374 uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) {
1375 return vceqq_s32(a, b);
1376 }
1377
1378 // CHECK-LABEL: @test_vceqq_f32(
1379 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b
1380 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1381 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t a,float32x4_t b)1382 uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) {
1383 return vceqq_f32(a, b);
1384 }
1385
1386 // CHECK-LABEL: @test_vceqq_u8(
1387 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1388 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1389 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t a,uint8x16_t b)1390 uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) {
1391 return vceqq_u8(a, b);
1392 }
1393
1394 // CHECK-LABEL: @test_vceqq_u16(
1395 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1396 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1397 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t a,uint16x8_t b)1398 uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) {
1399 return vceqq_u16(a, b);
1400 }
1401
1402 // CHECK-LABEL: @test_vceqq_u32(
1403 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1404 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1405 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t a,uint32x4_t b)1406 uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) {
1407 return vceqq_u32(a, b);
1408 }
1409
1410 // CHECK-LABEL: @test_vceqq_p8(
1411 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1412 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1413 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t a,poly8x16_t b)1414 uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) {
1415 return vceqq_p8(a, b);
1416 }
1417
1418 // CHECK-LABEL: @test_vcge_s8(
1419 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b
1420 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1421 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t a,int8x8_t b)1422 uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) {
1423 return vcge_s8(a, b);
1424 }
1425
1426 // CHECK-LABEL: @test_vcge_s16(
1427 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b
1428 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1429 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t a,int16x4_t b)1430 uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) {
1431 return vcge_s16(a, b);
1432 }
1433
1434 // CHECK-LABEL: @test_vcge_s32(
1435 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b
1436 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1437 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t a,int32x2_t b)1438 uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) {
1439 return vcge_s32(a, b);
1440 }
1441
1442 // CHECK-LABEL: @test_vcge_f32(
1443 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b
1444 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1445 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t a,float32x2_t b)1446 uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) {
1447 return vcge_f32(a, b);
1448 }
1449
1450 // CHECK-LABEL: @test_vcge_u8(
1451 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b
1452 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1453 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t a,uint8x8_t b)1454 uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) {
1455 return vcge_u8(a, b);
1456 }
1457
1458 // CHECK-LABEL: @test_vcge_u16(
1459 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b
1460 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1461 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t a,uint16x4_t b)1462 uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) {
1463 return vcge_u16(a, b);
1464 }
1465
1466 // CHECK-LABEL: @test_vcge_u32(
1467 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b
1468 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1469 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t a,uint32x2_t b)1470 uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) {
1471 return vcge_u32(a, b);
1472 }
1473
1474 // CHECK-LABEL: @test_vcgeq_s8(
1475 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b
1476 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1477 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t a,int8x16_t b)1478 uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) {
1479 return vcgeq_s8(a, b);
1480 }
1481
1482 // CHECK-LABEL: @test_vcgeq_s16(
1483 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b
1484 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1485 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t a,int16x8_t b)1486 uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) {
1487 return vcgeq_s16(a, b);
1488 }
1489
1490 // CHECK-LABEL: @test_vcgeq_s32(
1491 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b
1492 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1493 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t a,int32x4_t b)1494 uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) {
1495 return vcgeq_s32(a, b);
1496 }
1497
1498 // CHECK-LABEL: @test_vcgeq_f32(
1499 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b
1500 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1501 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t a,float32x4_t b)1502 uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) {
1503 return vcgeq_f32(a, b);
1504 }
1505
1506 // CHECK-LABEL: @test_vcgeq_u8(
1507 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b
1508 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1509 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t a,uint8x16_t b)1510 uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) {
1511 return vcgeq_u8(a, b);
1512 }
1513
1514 // CHECK-LABEL: @test_vcgeq_u16(
1515 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b
1516 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1517 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t a,uint16x8_t b)1518 uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) {
1519 return vcgeq_u16(a, b);
1520 }
1521
1522 // CHECK-LABEL: @test_vcgeq_u32(
1523 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b
1524 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1525 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t a,uint32x4_t b)1526 uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) {
1527 return vcgeq_u32(a, b);
1528 }
1529
1530 // CHECK-LABEL: @test_vcgt_s8(
1531 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b
1532 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1533 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t a,int8x8_t b)1534 uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) {
1535 return vcgt_s8(a, b);
1536 }
1537
1538 // CHECK-LABEL: @test_vcgt_s16(
1539 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b
1540 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1541 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t a,int16x4_t b)1542 uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) {
1543 return vcgt_s16(a, b);
1544 }
1545
1546 // CHECK-LABEL: @test_vcgt_s32(
1547 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b
1548 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1549 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t a,int32x2_t b)1550 uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) {
1551 return vcgt_s32(a, b);
1552 }
1553
1554 // CHECK-LABEL: @test_vcgt_f32(
1555 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b
1556 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1557 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t a,float32x2_t b)1558 uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) {
1559 return vcgt_f32(a, b);
1560 }
1561
1562 // CHECK-LABEL: @test_vcgt_u8(
1563 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b
1564 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1565 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t a,uint8x8_t b)1566 uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) {
1567 return vcgt_u8(a, b);
1568 }
1569
1570 // CHECK-LABEL: @test_vcgt_u16(
1571 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b
1572 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1573 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t a,uint16x4_t b)1574 uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) {
1575 return vcgt_u16(a, b);
1576 }
1577
1578 // CHECK-LABEL: @test_vcgt_u32(
1579 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b
1580 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1581 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t a,uint32x2_t b)1582 uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) {
1583 return vcgt_u32(a, b);
1584 }
1585
1586 // CHECK-LABEL: @test_vcgtq_s8(
1587 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b
1588 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1589 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t a,int8x16_t b)1590 uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) {
1591 return vcgtq_s8(a, b);
1592 }
1593
1594 // CHECK-LABEL: @test_vcgtq_s16(
1595 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b
1596 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1597 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t a,int16x8_t b)1598 uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) {
1599 return vcgtq_s16(a, b);
1600 }
1601
1602 // CHECK-LABEL: @test_vcgtq_s32(
1603 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b
1604 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1605 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t a,int32x4_t b)1606 uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) {
1607 return vcgtq_s32(a, b);
1608 }
1609
1610 // CHECK-LABEL: @test_vcgtq_f32(
1611 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b
1612 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1613 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t a,float32x4_t b)1614 uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) {
1615 return vcgtq_f32(a, b);
1616 }
1617
1618 // CHECK-LABEL: @test_vcgtq_u8(
1619 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b
1620 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1621 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t a,uint8x16_t b)1622 uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) {
1623 return vcgtq_u8(a, b);
1624 }
1625
1626 // CHECK-LABEL: @test_vcgtq_u16(
1627 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b
1628 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1629 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t a,uint16x8_t b)1630 uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) {
1631 return vcgtq_u16(a, b);
1632 }
1633
1634 // CHECK-LABEL: @test_vcgtq_u32(
1635 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b
1636 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1637 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t a,uint32x4_t b)1638 uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) {
1639 return vcgtq_u32(a, b);
1640 }
1641
1642 // CHECK-LABEL: @test_vcle_s8(
1643 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b
1644 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1645 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_s8(int8x8_t a,int8x8_t b)1646 uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) {
1647 return vcle_s8(a, b);
1648 }
1649
1650 // CHECK-LABEL: @test_vcle_s16(
1651 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b
1652 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1653 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t a,int16x4_t b)1654 uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) {
1655 return vcle_s16(a, b);
1656 }
1657
1658 // CHECK-LABEL: @test_vcle_s32(
1659 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b
1660 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1661 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t a,int32x2_t b)1662 uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) {
1663 return vcle_s32(a, b);
1664 }
1665
1666 // CHECK-LABEL: @test_vcle_f32(
1667 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b
1668 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1669 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t a,float32x2_t b)1670 uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) {
1671 return vcle_f32(a, b);
1672 }
1673
1674 // CHECK-LABEL: @test_vcle_u8(
1675 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b
1676 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1677 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t a,uint8x8_t b)1678 uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) {
1679 return vcle_u8(a, b);
1680 }
1681
1682 // CHECK-LABEL: @test_vcle_u16(
1683 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b
1684 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1685 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t a,uint16x4_t b)1686 uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) {
1687 return vcle_u16(a, b);
1688 }
1689
1690 // CHECK-LABEL: @test_vcle_u32(
1691 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b
1692 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1693 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t a,uint32x2_t b)1694 uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) {
1695 return vcle_u32(a, b);
1696 }
1697
1698 // CHECK-LABEL: @test_vcleq_s8(
1699 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b
1700 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1701 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t a,int8x16_t b)1702 uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) {
1703 return vcleq_s8(a, b);
1704 }
1705
1706 // CHECK-LABEL: @test_vcleq_s16(
1707 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b
1708 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1709 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t a,int16x8_t b)1710 uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) {
1711 return vcleq_s16(a, b);
1712 }
1713
1714 // CHECK-LABEL: @test_vcleq_s32(
1715 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b
1716 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1717 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t a,int32x4_t b)1718 uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) {
1719 return vcleq_s32(a, b);
1720 }
1721
1722 // CHECK-LABEL: @test_vcleq_f32(
1723 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b
1724 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1725 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t a,float32x4_t b)1726 uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) {
1727 return vcleq_f32(a, b);
1728 }
1729
1730 // CHECK-LABEL: @test_vcleq_u8(
1731 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b
1732 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1733 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t a,uint8x16_t b)1734 uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) {
1735 return vcleq_u8(a, b);
1736 }
1737
1738 // CHECK-LABEL: @test_vcleq_u16(
1739 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b
1740 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1741 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t a,uint16x8_t b)1742 uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) {
1743 return vcleq_u16(a, b);
1744 }
1745
1746 // CHECK-LABEL: @test_vcleq_u32(
1747 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b
1748 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1749 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t a,uint32x4_t b)1750 uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) {
1751 return vcleq_u32(a, b);
1752 }
1753
1754 // CHECK-LABEL: @test_vcls_s8(
1755 // CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
1756 // CHECK: ret <8 x i8> [[VCLS_V_I]]
test_vcls_s8(int8x8_t a)1757 int8x8_t test_vcls_s8(int8x8_t a) {
1758 return vcls_s8(a);
1759 }
1760
1761 // CHECK-LABEL: @test_vcls_s16(
1762 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1763 // CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
1764 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
1765 // CHECK: ret <4 x i16> [[VCLS_V1_I]]
test_vcls_s16(int16x4_t a)1766 int16x4_t test_vcls_s16(int16x4_t a) {
1767 return vcls_s16(a);
1768 }
1769
1770 // CHECK-LABEL: @test_vcls_s32(
1771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1772 // CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
1773 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
1774 // CHECK: ret <2 x i32> [[VCLS_V1_I]]
test_vcls_s32(int32x2_t a)1775 int32x2_t test_vcls_s32(int32x2_t a) {
1776 return vcls_s32(a);
1777 }
1778
1779 // CHECK-LABEL: @test_vcls_u8(
1780 // CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
1781 // CHECK: ret <8 x i8> [[VCLS_V_I]]
test_vcls_u8(uint8x8_t a)1782 int8x8_t test_vcls_u8(uint8x8_t a) {
1783 return vcls_u8(a);
1784 }
1785
1786 // CHECK-LABEL: @test_vcls_u16(
1787 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1788 // CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
1789 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
1790 // CHECK: ret <4 x i16> [[VCLS_V1_I]]
test_vcls_u16(uint16x4_t a)1791 int16x4_t test_vcls_u16(uint16x4_t a) {
1792 return vcls_u16(a);
1793 }
1794
1795 // CHECK-LABEL: @test_vcls_u32(
1796 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1797 // CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
1798 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
1799 // CHECK: ret <2 x i32> [[VCLS_V1_I]]
test_vcls_u32(uint32x2_t a)1800 int32x2_t test_vcls_u32(uint32x2_t a) {
1801 return vcls_u32(a);
1802 }
1803
1804 // CHECK-LABEL: @test_vclsq_s8(
1805 // CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
1806 // CHECK: ret <16 x i8> [[VCLSQ_V_I]]
test_vclsq_s8(int8x16_t a)1807 int8x16_t test_vclsq_s8(int8x16_t a) {
1808 return vclsq_s8(a);
1809 }
1810
1811 // CHECK-LABEL: @test_vclsq_s16(
1812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1813 // CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
1814 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
1815 // CHECK: ret <8 x i16> [[VCLSQ_V1_I]]
test_vclsq_s16(int16x8_t a)1816 int16x8_t test_vclsq_s16(int16x8_t a) {
1817 return vclsq_s16(a);
1818 }
1819
1820 // CHECK-LABEL: @test_vclsq_s32(
1821 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1822 // CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
1823 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
1824 // CHECK: ret <4 x i32> [[VCLSQ_V1_I]]
test_vclsq_s32(int32x4_t a)1825 int32x4_t test_vclsq_s32(int32x4_t a) {
1826 return vclsq_s32(a);
1827 }
1828
1829 // CHECK-LABEL: @test_vclsq_u8(
1830 // CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
1831 // CHECK: ret <16 x i8> [[VCLSQ_V_I]]
test_vclsq_u8(uint8x16_t a)1832 int8x16_t test_vclsq_u8(uint8x16_t a) {
1833 return vclsq_u8(a);
1834 }
1835
1836 // CHECK-LABEL: @test_vclsq_u16(
1837 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1838 // CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
1839 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
1840 // CHECK: ret <8 x i16> [[VCLSQ_V1_I]]
test_vclsq_u16(uint16x8_t a)1841 int16x8_t test_vclsq_u16(uint16x8_t a) {
1842 return vclsq_u16(a);
1843 }
1844
1845 // CHECK-LABEL: @test_vclsq_u32(
1846 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1847 // CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
1848 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
1849 // CHECK: ret <4 x i32> [[VCLSQ_V1_I]]
test_vclsq_u32(uint32x4_t a)1850 int32x4_t test_vclsq_u32(uint32x4_t a) {
1851 return vclsq_u32(a);
1852 }
1853
1854 // CHECK-LABEL: @test_vclt_s8(
1855 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b
1856 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1857 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_s8(int8x8_t a,int8x8_t b)1858 uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) {
1859 return vclt_s8(a, b);
1860 }
1861
1862 // CHECK-LABEL: @test_vclt_s16(
1863 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b
1864 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1865 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t a,int16x4_t b)1866 uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) {
1867 return vclt_s16(a, b);
1868 }
1869
1870 // CHECK-LABEL: @test_vclt_s32(
1871 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b
1872 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1873 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t a,int32x2_t b)1874 uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) {
1875 return vclt_s32(a, b);
1876 }
1877
1878 // CHECK-LABEL: @test_vclt_f32(
1879 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b
1880 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1881 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t a,float32x2_t b)1882 uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) {
1883 return vclt_f32(a, b);
1884 }
1885
1886 // CHECK-LABEL: @test_vclt_u8(
1887 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b
1888 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1889 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t a,uint8x8_t b)1890 uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) {
1891 return vclt_u8(a, b);
1892 }
1893
1894 // CHECK-LABEL: @test_vclt_u16(
1895 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b
1896 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1897 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t a,uint16x4_t b)1898 uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) {
1899 return vclt_u16(a, b);
1900 }
1901
1902 // CHECK-LABEL: @test_vclt_u32(
1903 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b
1904 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1905 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t a,uint32x2_t b)1906 uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) {
1907 return vclt_u32(a, b);
1908 }
1909
1910 // CHECK-LABEL: @test_vcltq_s8(
1911 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b
1912 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1913 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t a,int8x16_t b)1914 uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) {
1915 return vcltq_s8(a, b);
1916 }
1917
1918 // CHECK-LABEL: @test_vcltq_s16(
1919 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b
1920 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1921 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t a,int16x8_t b)1922 uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) {
1923 return vcltq_s16(a, b);
1924 }
1925
1926 // CHECK-LABEL: @test_vcltq_s32(
1927 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b
1928 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1929 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t a,int32x4_t b)1930 uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) {
1931 return vcltq_s32(a, b);
1932 }
1933
1934 // CHECK-LABEL: @test_vcltq_f32(
1935 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b
1936 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1937 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t a,float32x4_t b)1938 uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) {
1939 return vcltq_f32(a, b);
1940 }
1941
1942 // CHECK-LABEL: @test_vcltq_u8(
1943 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b
1944 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1945 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t a,uint8x16_t b)1946 uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) {
1947 return vcltq_u8(a, b);
1948 }
1949
1950 // CHECK-LABEL: @test_vcltq_u16(
1951 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b
1952 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1953 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t a,uint16x8_t b)1954 uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) {
1955 return vcltq_u16(a, b);
1956 }
1957
1958 // CHECK-LABEL: @test_vcltq_u32(
1959 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b
1960 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1961 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t a,uint32x4_t b)1962 uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) {
1963 return vcltq_u32(a, b);
1964 }
1965
1966 // CHECK-LABEL: @test_vclz_s8(
1967 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1968 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vclz_s8(int8x8_t a)1969 int8x8_t test_vclz_s8(int8x8_t a) {
1970 return vclz_s8(a);
1971 }
1972
1973 // CHECK-LABEL: @test_vclz_s16(
1974 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1975 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
1976 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
1977 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
test_vclz_s16(int16x4_t a)1978 int16x4_t test_vclz_s16(int16x4_t a) {
1979 return vclz_s16(a);
1980 }
1981
1982 // CHECK-LABEL: @test_vclz_s32(
1983 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1984 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
1985 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
1986 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
test_vclz_s32(int32x2_t a)1987 int32x2_t test_vclz_s32(int32x2_t a) {
1988 return vclz_s32(a);
1989 }
1990
1991 // CHECK-LABEL: @test_vclz_u8(
1992 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1993 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vclz_u8(uint8x8_t a)1994 uint8x8_t test_vclz_u8(uint8x8_t a) {
1995 return vclz_u8(a);
1996 }
1997
1998 // CHECK-LABEL: @test_vclz_u16(
1999 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2000 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
2001 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2002 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
test_vclz_u16(uint16x4_t a)2003 uint16x4_t test_vclz_u16(uint16x4_t a) {
2004 return vclz_u16(a);
2005 }
2006
2007 // CHECK-LABEL: @test_vclz_u32(
2008 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2009 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
2010 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2011 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
test_vclz_u32(uint32x2_t a)2012 uint32x2_t test_vclz_u32(uint32x2_t a) {
2013 return vclz_u32(a);
2014 }
2015
2016 // CHECK-LABEL: @test_vclzq_s8(
2017 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
2018 // CHECK: ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_s8(int8x16_t a)2019 int8x16_t test_vclzq_s8(int8x16_t a) {
2020 return vclzq_s8(a);
2021 }
2022
2023 // CHECK-LABEL: @test_vclzq_s16(
2024 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2025 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
2026 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2027 // CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
test_vclzq_s16(int16x8_t a)2028 int16x8_t test_vclzq_s16(int16x8_t a) {
2029 return vclzq_s16(a);
2030 }
2031
2032 // CHECK-LABEL: @test_vclzq_s32(
2033 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2034 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
2035 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2036 // CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
test_vclzq_s32(int32x4_t a)2037 int32x4_t test_vclzq_s32(int32x4_t a) {
2038 return vclzq_s32(a);
2039 }
2040
2041 // CHECK-LABEL: @test_vclzq_u8(
2042 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
2043 // CHECK: ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_u8(uint8x16_t a)2044 uint8x16_t test_vclzq_u8(uint8x16_t a) {
2045 return vclzq_u8(a);
2046 }
2047
2048 // CHECK-LABEL: @test_vclzq_u16(
2049 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2050 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
2051 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2052 // CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
test_vclzq_u16(uint16x8_t a)2053 uint16x8_t test_vclzq_u16(uint16x8_t a) {
2054 return vclzq_u16(a);
2055 }
2056
2057 // CHECK-LABEL: @test_vclzq_u32(
2058 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2059 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
2060 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2061 // CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
test_vclzq_u32(uint32x4_t a)2062 uint32x4_t test_vclzq_u32(uint32x4_t a) {
2063 return vclzq_u32(a);
2064 }
2065
2066 // CHECK-LABEL: @test_vcnt_u8(
2067 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2068 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcnt_u8(uint8x8_t a)2069 uint8x8_t test_vcnt_u8(uint8x8_t a) {
2070 return vcnt_u8(a);
2071 }
2072
2073 // CHECK-LABEL: @test_vcnt_s8(
2074 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2075 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcnt_s8(int8x8_t a)2076 int8x8_t test_vcnt_s8(int8x8_t a) {
2077 return vcnt_s8(a);
2078 }
2079
2080 // CHECK-LABEL: @test_vcnt_p8(
2081 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2082 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcnt_p8(poly8x8_t a)2083 poly8x8_t test_vcnt_p8(poly8x8_t a) {
2084 return vcnt_p8(a);
2085 }
2086
2087 // CHECK-LABEL: @test_vcntq_u8(
2088 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2089 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_u8(uint8x16_t a)2090 uint8x16_t test_vcntq_u8(uint8x16_t a) {
2091 return vcntq_u8(a);
2092 }
2093
2094 // CHECK-LABEL: @test_vcntq_s8(
2095 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2096 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_s8(int8x16_t a)2097 int8x16_t test_vcntq_s8(int8x16_t a) {
2098 return vcntq_s8(a);
2099 }
2100
2101 // CHECK-LABEL: @test_vcntq_p8(
2102 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2103 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_p8(poly8x16_t a)2104 poly8x16_t test_vcntq_p8(poly8x16_t a) {
2105 return vcntq_p8(a);
2106 }
2107
2108 // CHECK-LABEL: @test_vcombine_s8(
2109 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2110 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_s8(int8x8_t a,int8x8_t b)2111 int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) {
2112 return vcombine_s8(a, b);
2113 }
2114
2115 // CHECK-LABEL: @test_vcombine_s16(
2116 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2117 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_s16(int16x4_t a,int16x4_t b)2118 int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) {
2119 return vcombine_s16(a, b);
2120 }
2121
2122 // CHECK-LABEL: @test_vcombine_s32(
2123 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2124 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_s32(int32x2_t a,int32x2_t b)2125 int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) {
2126 return vcombine_s32(a, b);
2127 }
2128
2129 // CHECK-LABEL: @test_vcombine_s64(
2130 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2131 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_s64(int64x1_t a,int64x1_t b)2132 int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) {
2133 return vcombine_s64(a, b);
2134 }
2135
2136 // CHECK-LABEL: @test_vcombine_f16(
2137 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2138 // CHECK: ret <8 x half> [[SHUFFLE_I]]
test_vcombine_f16(float16x4_t a,float16x4_t b)2139 float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) {
2140 return vcombine_f16(a, b);
2141 }
2142
2143 // CHECK-LABEL: @test_vcombine_f32(
2144 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2145 // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vcombine_f32(float32x2_t a,float32x2_t b)2146 float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) {
2147 return vcombine_f32(a, b);
2148 }
2149
2150 // CHECK-LABEL: @test_vcombine_u8(
2151 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2152 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_u8(uint8x8_t a,uint8x8_t b)2153 uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) {
2154 return vcombine_u8(a, b);
2155 }
2156
2157 // CHECK-LABEL: @test_vcombine_u16(
2158 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2159 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_u16(uint16x4_t a,uint16x4_t b)2160 uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) {
2161 return vcombine_u16(a, b);
2162 }
2163
2164 // CHECK-LABEL: @test_vcombine_u32(
2165 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2166 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_u32(uint32x2_t a,uint32x2_t b)2167 uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) {
2168 return vcombine_u32(a, b);
2169 }
2170
2171 // CHECK-LABEL: @test_vcombine_u64(
2172 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2173 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_u64(uint64x1_t a,uint64x1_t b)2174 uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) {
2175 return vcombine_u64(a, b);
2176 }
2177
2178 // CHECK-LABEL: @test_vcombine_p8(
2179 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2180 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_p8(poly8x8_t a,poly8x8_t b)2181 poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) {
2182 return vcombine_p8(a, b);
2183 }
2184
2185 // CHECK-LABEL: @test_vcombine_p16(
2186 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2187 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_p16(poly16x4_t a,poly16x4_t b)2188 poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) {
2189 return vcombine_p16(a, b);
2190 }
2191
2192 // CHECK-LABEL: @test_vcreate_s8(
2193 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2194 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2195 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_s8(uint64_t a)2196 int8x8_t test_vcreate_s8(uint64_t a) {
2197 return vclz_s8(vcreate_s8(a));
2198 }
2199
2200 // CHECK-LABEL: @test_vcreate_imm
2201 // CHECK: [[RES:%.*]] = bitcast i64 0 to <4 x i16>
2202 // CHECK: ret <4 x i16> [[RES]]
test_vcreate_imm(void)2203 int16x4_t test_vcreate_imm(void) {
2204 return vcreate_s16(0);
2205 }
2206
2207 // CHECK-LABEL: @test_vcreate_s16(
2208 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2209 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2210 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2211 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2212 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
test_vcreate_s16(uint64_t a)2213 int16x4_t test_vcreate_s16(uint64_t a) {
2214 return vclz_s16(vcreate_s16(a));
2215 }
2216
2217 // CHECK-LABEL: @test_vcreate_s32(
2218 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2219 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2220 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2221 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2222 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
test_vcreate_s32(uint64_t a)2223 int32x2_t test_vcreate_s32(uint64_t a) {
2224 return vclz_s32(vcreate_s32(a));
2225 }
2226
2227 // CHECK-LABEL: @test_vcreate_f16(
2228 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x half>
2229 // CHECK: ret <4 x half> [[TMP0]]
test_vcreate_f16(uint64_t a)2230 float16x4_t test_vcreate_f16(uint64_t a) {
2231 return vcreate_f16(a);
2232 }
2233
2234 // CHECK-LABEL: @test_vcreate_f32(
2235 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x float>
2236 // CHECK: ret <2 x float> [[TMP0]]
test_vcreate_f32(uint64_t a)2237 float32x2_t test_vcreate_f32(uint64_t a) {
2238 return vcreate_f32(a);
2239 }
2240
2241 // CHECK-LABEL: @test_vcreate_u8(
2242 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2243 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2244 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_u8(uint64_t a)2245 int8x8_t test_vcreate_u8(uint64_t a) {
2246 return vclz_s8((int8x8_t)vcreate_u8(a));
2247 }
2248
2249 // CHECK-LABEL: @test_vcreate_u16(
2250 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2251 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2252 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2253 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2254 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
test_vcreate_u16(uint64_t a)2255 int16x4_t test_vcreate_u16(uint64_t a) {
2256 return vclz_s16((int16x4_t)vcreate_u16(a));
2257 }
2258
2259 // CHECK-LABEL: @test_vcreate_u32(
2260 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2261 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2262 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2263 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2264 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
test_vcreate_u32(uint64_t a)2265 int32x2_t test_vcreate_u32(uint64_t a) {
2266 return vclz_s32((int32x2_t)vcreate_u32(a));
2267 }
2268
2269 // CHECK-LABEL: @test_vcreate_u64(
2270 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2271 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2272 // CHECK: ret <1 x i64> [[ADD_I]]
test_vcreate_u64(uint64_t a)2273 uint64x1_t test_vcreate_u64(uint64_t a) {
2274 uint64x1_t tmp = vcreate_u64(a);
2275 return vadd_u64(tmp, tmp);
2276 }
2277
2278 // CHECK-LABEL: @test_vcreate_p8(
2279 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2280 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]])
2281 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcreate_p8(uint64_t a)2282 poly8x8_t test_vcreate_p8(uint64_t a) {
2283 return vcnt_p8(vcreate_p8(a));
2284 }
2285
2286 // CHECK-LABEL: @test_vcreate_p16(
2287 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2288 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2289 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2290 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2291 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
2292 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
2293 // CHECK: ret <4 x i16> [[TMP4]]
test_vcreate_p16(uint64_t a)2294 poly16x4_t test_vcreate_p16(uint64_t a) {
2295 poly16x4_t tmp = vcreate_p16(a);
2296 return vbsl_p16((uint16x4_t)tmp, tmp, tmp);
2297 }
2298
2299 // CHECK-LABEL: @test_vcreate_s64(
2300 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2301 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2302 // CHECK: ret <1 x i64> [[ADD_I]]
test_vcreate_s64(uint64_t a)2303 int64x1_t test_vcreate_s64(uint64_t a) {
2304 int64x1_t tmp = vcreate_s64(a);
2305 return vadd_s64(tmp, tmp);
2306 }
2307
2308 // CHECK-LABEL: @test_vcvt_f16_f32(
2309 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2310 // CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a)
2311 // CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
2312 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
2313 // CHECK: ret <4 x half> [[TMP1]]
test_vcvt_f16_f32(float32x4_t a)2314 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
2315 return vcvt_f16_f32(a);
2316 }
2317
2318 // CHECK-LABEL: @test_vcvt_f32_s32(
2319 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2320 // CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float>
2321 // CHECK: ret <2 x float> [[VCVT_I]]
test_vcvt_f32_s32(int32x2_t a)2322 float32x2_t test_vcvt_f32_s32(int32x2_t a) {
2323 return vcvt_f32_s32(a);
2324 }
2325
2326 // CHECK-LABEL: @test_vcvt_f32_u32(
2327 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2328 // CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float>
2329 // CHECK: ret <2 x float> [[VCVT_I]]
test_vcvt_f32_u32(uint32x2_t a)2330 float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
2331 return vcvt_f32_u32(a);
2332 }
2333
2334 // CHECK-LABEL: @test_vcvtq_f32_s32(
2335 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2336 // CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float>
2337 // CHECK: ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_s32(int32x4_t a)2338 float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
2339 return vcvtq_f32_s32(a);
2340 }
2341
2342 // CHECK-LABEL: @test_vcvtq_f32_u32(
2343 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2344 // CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float>
2345 // CHECK: ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_u32(uint32x4_t a)2346 float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
2347 return vcvtq_f32_u32(a);
2348 }
2349
2350 // CHECK-LABEL: @test_vcvt_f32_f16(
2351 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
2352 // CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2353 // CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
2354 // CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
2355 // CHECK: ret <4 x float> [[VCVT_F32_F161_I]]
test_vcvt_f32_f16(float16x4_t a)2356 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
2357 return vcvt_f32_f16(a);
2358 }
2359
2360 // CHECK-LABEL: @test_vcvt_n_f32_s32(
2361 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2362 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2363 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2364 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)2365 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
2366 return vcvt_n_f32_s32(a, 1);
2367 }
2368
2369 // CHECK-LABEL: @test_vcvt_n_f32_u32(
2370 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2371 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2372 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2373 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)2374 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
2375 return vcvt_n_f32_u32(a, 1);
2376 }
2377
2378 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
2379 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2380 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2381 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2382 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)2383 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
2384 return vcvtq_n_f32_s32(a, 3);
2385 }
2386
2387 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
2388 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2389 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2390 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2391 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)2392 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
2393 return vcvtq_n_f32_u32(a, 3);
2394 }
2395
2396 // CHECK-LABEL: @test_vcvt_n_s32_f32(
2397 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2398 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2399 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2400 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)2401 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
2402 return vcvt_n_s32_f32(a, 1);
2403 }
2404
2405 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
2406 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2407 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2408 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2409 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)2410 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
2411 return vcvtq_n_s32_f32(a, 3);
2412 }
2413
2414 // CHECK-LABEL: @test_vcvt_n_u32_f32(
2415 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2416 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2417 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2418 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)2419 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
2420 return vcvt_n_u32_f32(a, 1);
2421 }
2422
2423 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
2424 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2425 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2426 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2427 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)2428 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
2429 return vcvtq_n_u32_f32(a, 3);
2430 }
2431
2432 // CHECK-LABEL: @test_vcvt_s32_f32(
2433 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2434 // CHECK: [[VCVT_I:%.*]] = fptosi <2 x float> %a to <2 x i32>
2435 // CHECK: ret <2 x i32> [[VCVT_I]]
test_vcvt_s32_f32(float32x2_t a)2436 int32x2_t test_vcvt_s32_f32(float32x2_t a) {
2437 return vcvt_s32_f32(a);
2438 }
2439
2440 // CHECK-LABEL: @test_vcvtq_s32_f32(
2441 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2442 // CHECK: [[VCVT_I:%.*]] = fptosi <4 x float> %a to <4 x i32>
2443 // CHECK: ret <4 x i32> [[VCVT_I]]
test_vcvtq_s32_f32(float32x4_t a)2444 int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
2445 return vcvtq_s32_f32(a);
2446 }
2447
2448 // CHECK-LABEL: @test_vcvt_u32_f32(
2449 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2450 // CHECK: [[VCVT_I:%.*]] = fptoui <2 x float> %a to <2 x i32>
2451 // CHECK: ret <2 x i32> [[VCVT_I]]
test_vcvt_u32_f32(float32x2_t a)2452 uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
2453 return vcvt_u32_f32(a);
2454 }
2455
2456 // CHECK-LABEL: @test_vcvtq_u32_f32(
2457 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2458 // CHECK: [[VCVT_I:%.*]] = fptoui <4 x float> %a to <4 x i32>
2459 // CHECK: ret <4 x i32> [[VCVT_I]]
test_vcvtq_u32_f32(float32x4_t a)2460 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
2461 return vcvtq_u32_f32(a);
2462 }
2463
2464 // CHECK-LABEL: @test_vdup_lane_u8(
2465 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2466 // CHECK: ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_u8(uint8x8_t a)2467 uint8x8_t test_vdup_lane_u8(uint8x8_t a) {
2468 return vdup_lane_u8(a, 7);
2469 }
2470
2471 // CHECK-LABEL: @test_vdup_lane_u16(
2472 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2473 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2474 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2475 // CHECK: ret <4 x i16> [[LANE]]
test_vdup_lane_u16(uint16x4_t a)2476 uint16x4_t test_vdup_lane_u16(uint16x4_t a) {
2477 return vdup_lane_u16(a, 3);
2478 }
2479
2480 // CHECK-LABEL: @test_vdup_lane_u32(
2481 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2482 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2483 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2484 // CHECK: ret <2 x i32> [[LANE]]
test_vdup_lane_u32(uint32x2_t a)2485 uint32x2_t test_vdup_lane_u32(uint32x2_t a) {
2486 return vdup_lane_u32(a, 1);
2487 }
2488
2489 // CHECK-LABEL: @test_vdup_lane_s8(
2490 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2491 // CHECK: ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_s8(int8x8_t a)2492 int8x8_t test_vdup_lane_s8(int8x8_t a) {
2493 return vdup_lane_s8(a, 7);
2494 }
2495
2496 // CHECK-LABEL: @test_vdup_lane_s16(
2497 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2498 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2499 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2500 // CHECK: ret <4 x i16> [[LANE]]
test_vdup_lane_s16(int16x4_t a)2501 int16x4_t test_vdup_lane_s16(int16x4_t a) {
2502 return vdup_lane_s16(a, 3);
2503 }
2504
2505 // CHECK-LABEL: @test_vdup_lane_s32(
2506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2507 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2508 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2509 // CHECK: ret <2 x i32> [[LANE]]
test_vdup_lane_s32(int32x2_t a)2510 int32x2_t test_vdup_lane_s32(int32x2_t a) {
2511 return vdup_lane_s32(a, 1);
2512 }
2513
2514 // CHECK-LABEL: @test_vdup_lane_p8(
2515 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2516 // CHECK: ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_p8(poly8x8_t a)2517 poly8x8_t test_vdup_lane_p8(poly8x8_t a) {
2518 return vdup_lane_p8(a, 7);
2519 }
2520
2521 // CHECK-LABEL: @test_vdup_lane_p16(
2522 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2523 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2524 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2525 // CHECK: ret <4 x i16> [[LANE]]
test_vdup_lane_p16(poly16x4_t a)2526 poly16x4_t test_vdup_lane_p16(poly16x4_t a) {
2527 return vdup_lane_p16(a, 3);
2528 }
2529
2530 // CHECK-LABEL: @test_vdup_lane_f32(
2531 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2532 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2533 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
2534 // CHECK: ret <2 x float> [[LANE]]
test_vdup_lane_f32(float32x2_t a)2535 float32x2_t test_vdup_lane_f32(float32x2_t a) {
2536 return vdup_lane_f32(a, 1);
2537 }
2538
2539 // CHECK-LABEL: @test_vdupq_lane_u8(
2540 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2541 // CHECK: ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_u8(uint8x8_t a)2542 uint8x16_t test_vdupq_lane_u8(uint8x8_t a) {
2543 return vdupq_lane_u8(a, 7);
2544 }
2545
2546 // CHECK-LABEL: @test_vdupq_lane_u16(
2547 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2548 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2549 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2550 // CHECK: ret <8 x i16> [[LANE]]
test_vdupq_lane_u16(uint16x4_t a)2551 uint16x8_t test_vdupq_lane_u16(uint16x4_t a) {
2552 return vdupq_lane_u16(a, 3);
2553 }
2554
2555 // CHECK-LABEL: @test_vdupq_lane_u32(
2556 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2557 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2558 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2559 // CHECK: ret <4 x i32> [[LANE]]
test_vdupq_lane_u32(uint32x2_t a)2560 uint32x4_t test_vdupq_lane_u32(uint32x2_t a) {
2561 return vdupq_lane_u32(a, 1);
2562 }
2563
2564 // CHECK-LABEL: @test_vdupq_lane_s8(
2565 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2566 // CHECK: ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_s8(int8x8_t a)2567 int8x16_t test_vdupq_lane_s8(int8x8_t a) {
2568 return vdupq_lane_s8(a, 7);
2569 }
2570
2571 // CHECK-LABEL: @test_vdupq_lane_s16(
2572 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2573 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2574 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2575 // CHECK: ret <8 x i16> [[LANE]]
test_vdupq_lane_s16(int16x4_t a)2576 int16x8_t test_vdupq_lane_s16(int16x4_t a) {
2577 return vdupq_lane_s16(a, 3);
2578 }
2579
2580 // CHECK-LABEL: @test_vdupq_lane_s32(
2581 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2582 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2583 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2584 // CHECK: ret <4 x i32> [[LANE]]
test_vdupq_lane_s32(int32x2_t a)2585 int32x4_t test_vdupq_lane_s32(int32x2_t a) {
2586 return vdupq_lane_s32(a, 1);
2587 }
2588
2589 // CHECK-LABEL: @test_vdupq_lane_p8(
2590 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2591 // CHECK: ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_p8(poly8x8_t a)2592 poly8x16_t test_vdupq_lane_p8(poly8x8_t a) {
2593 return vdupq_lane_p8(a, 7);
2594 }
2595
2596 // CHECK-LABEL: @test_vdupq_lane_p16(
2597 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2598 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2599 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2600 // CHECK: ret <8 x i16> [[LANE]]
test_vdupq_lane_p16(poly16x4_t a)2601 poly16x8_t test_vdupq_lane_p16(poly16x4_t a) {
2602 return vdupq_lane_p16(a, 3);
2603 }
2604
2605 // CHECK-LABEL: @test_vdupq_lane_f32(
2606 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2607 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2608 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2609 // CHECK: ret <4 x float> [[LANE]]
test_vdupq_lane_f32(float32x2_t a)2610 float32x4_t test_vdupq_lane_f32(float32x2_t a) {
2611 return vdupq_lane_f32(a, 1);
2612 }
2613
2614 // CHECK-LABEL: @test_vdup_lane_s64(
2615 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2616 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2617 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2618 // CHECK: ret <1 x i64> [[LANE]]
test_vdup_lane_s64(int64x1_t a)2619 int64x1_t test_vdup_lane_s64(int64x1_t a) {
2620 return vdup_lane_s64(a, 0);
2621 }
2622
2623 // CHECK-LABEL: @test_vdup_lane_u64(
2624 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2625 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2626 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2627 // CHECK: ret <1 x i64> [[LANE]]
test_vdup_lane_u64(uint64x1_t a)2628 uint64x1_t test_vdup_lane_u64(uint64x1_t a) {
2629 return vdup_lane_u64(a, 0);
2630 }
2631
2632 // CHECK-LABEL: @test_vdupq_lane_s64(
2633 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2634 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2635 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2636 // CHECK: ret <2 x i64> [[LANE]]
test_vdupq_lane_s64(int64x1_t a)2637 int64x2_t test_vdupq_lane_s64(int64x1_t a) {
2638 return vdupq_lane_s64(a, 0);
2639 }
2640
2641 // CHECK-LABEL: @test_vdupq_lane_u64(
2642 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2643 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2644 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2645 // CHECK: ret <2 x i64> [[LANE]]
test_vdupq_lane_u64(uint64x1_t a)2646 uint64x2_t test_vdupq_lane_u64(uint64x1_t a) {
2647 return vdupq_lane_u64(a, 0);
2648 }
2649
2650 // CHECK-LABEL: @test_vdup_n_u8(
2651 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2652 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2653 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2654 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2655 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2656 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2657 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2658 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2659 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_u8(uint8_t a)2660 uint8x8_t test_vdup_n_u8(uint8_t a) {
2661 return vdup_n_u8(a);
2662 }
2663
2664 // CHECK-LABEL: @test_vdup_n_u16(
2665 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2666 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2667 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2668 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2669 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_u16(uint16_t a)2670 uint16x4_t test_vdup_n_u16(uint16_t a) {
2671 return vdup_n_u16(a);
2672 }
2673
2674 // CHECK-LABEL: @test_vdup_n_u32(
2675 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2676 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2677 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_u32(uint32_t a)2678 uint32x2_t test_vdup_n_u32(uint32_t a) {
2679 return vdup_n_u32(a);
2680 }
2681
2682 // CHECK-LABEL: @test_vdup_n_s8(
2683 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2684 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2685 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2686 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2687 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2688 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2689 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2690 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2691 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_s8(int8_t a)2692 int8x8_t test_vdup_n_s8(int8_t a) {
2693 return vdup_n_s8(a);
2694 }
2695
2696 // CHECK-LABEL: @test_vdup_n_s16(
2697 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2698 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2699 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2700 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2701 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_s16(int16_t a)2702 int16x4_t test_vdup_n_s16(int16_t a) {
2703 return vdup_n_s16(a);
2704 }
2705
2706 // CHECK-LABEL: @test_vdup_n_s32(
2707 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2708 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2709 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_s32(int32_t a)2710 int32x2_t test_vdup_n_s32(int32_t a) {
2711 return vdup_n_s32(a);
2712 }
2713
2714 // CHECK-LABEL: @test_vdup_n_p8(
2715 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2716 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2717 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2718 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2719 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2720 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2721 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2722 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2723 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_p8(poly8_t a)2724 poly8x8_t test_vdup_n_p8(poly8_t a) {
2725 return vdup_n_p8(a);
2726 }
2727
2728 // CHECK-LABEL: @test_vdup_n_p16(
2729 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2730 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2731 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2732 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2733 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_p16(poly16_t a)2734 poly16x4_t test_vdup_n_p16(poly16_t a) {
2735 return vdup_n_p16(a);
2736 }
2737
2738 // CHECK-LABEL: @test_vdup_n_f16(
2739 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
2740 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
2741 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
2742 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
2743 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
2744 // CHECK: ret <4 x half> [[VECINIT3]]
test_vdup_n_f16(float16_t * a)2745 float16x4_t test_vdup_n_f16(float16_t *a) {
2746 return vdup_n_f16(*a);
2747 }
2748
2749 // CHECK-LABEL: @test_vdup_n_f32(
2750 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
2751 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
2752 // CHECK: ret <2 x float> [[VECINIT1_I]]
test_vdup_n_f32(float32_t a)2753 float32x2_t test_vdup_n_f32(float32_t a) {
2754 return vdup_n_f32(a);
2755 }
2756
2757 // CHECK-LABEL: @test_vdupq_n_u8(
2758 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2759 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2760 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2761 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2762 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2763 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2764 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2765 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2766 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2767 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2768 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2769 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2770 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2771 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2772 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2773 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2774 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_u8(uint8_t a)2775 uint8x16_t test_vdupq_n_u8(uint8_t a) {
2776 return vdupq_n_u8(a);
2777 }
2778
2779 // CHECK-LABEL: @test_vdupq_n_u16(
2780 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2781 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2782 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2783 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2784 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2785 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2786 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2787 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2788 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_u16(uint16_t a)2789 uint16x8_t test_vdupq_n_u16(uint16_t a) {
2790 return vdupq_n_u16(a);
2791 }
2792
2793 // CHECK-LABEL: @test_vdupq_n_u32(
2794 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2795 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2796 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2797 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2798 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_u32(uint32_t a)2799 uint32x4_t test_vdupq_n_u32(uint32_t a) {
2800 return vdupq_n_u32(a);
2801 }
2802
2803 // CHECK-LABEL: @test_vdupq_n_s8(
2804 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2805 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2806 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2807 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2808 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2809 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2810 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2811 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2812 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2813 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2814 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2815 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2816 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2817 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2818 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2819 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2820 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_s8(int8_t a)2821 int8x16_t test_vdupq_n_s8(int8_t a) {
2822 return vdupq_n_s8(a);
2823 }
2824
2825 // CHECK-LABEL: @test_vdupq_n_s16(
2826 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2827 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2828 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2829 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2830 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2831 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2832 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2833 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2834 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_s16(int16_t a)2835 int16x8_t test_vdupq_n_s16(int16_t a) {
2836 return vdupq_n_s16(a);
2837 }
2838
2839 // CHECK-LABEL: @test_vdupq_n_s32(
2840 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2841 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2842 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2843 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2844 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_s32(int32_t a)2845 int32x4_t test_vdupq_n_s32(int32_t a) {
2846 return vdupq_n_s32(a);
2847 }
2848
2849 // CHECK-LABEL: @test_vdupq_n_p8(
2850 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2851 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2852 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2853 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2854 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2855 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2856 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2857 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2858 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2859 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2860 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2861 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2862 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2863 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2864 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2865 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2866 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_p8(poly8_t a)2867 poly8x16_t test_vdupq_n_p8(poly8_t a) {
2868 return vdupq_n_p8(a);
2869 }
2870
2871 // CHECK-LABEL: @test_vdupq_n_p16(
2872 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2873 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2874 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2875 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2876 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2877 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2878 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2879 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2880 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_p16(poly16_t a)2881 poly16x8_t test_vdupq_n_p16(poly16_t a) {
2882 return vdupq_n_p16(a);
2883 }
2884
2885 // CHECK-LABEL: @test_vdupq_n_f16(
2886 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
2887 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
2888 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
2889 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
2890 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
2891 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
2892 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
2893 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
2894 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
2895 // CHECK: ret <8 x half> [[VECINIT7]]
test_vdupq_n_f16(float16_t * a)2896 float16x8_t test_vdupq_n_f16(float16_t *a) {
2897 return vdupq_n_f16(*a);
2898 }
2899
2900 // CHECK-LABEL: @test_vdupq_n_f32(
2901 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
2902 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
2903 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
2904 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
2905 // CHECK: ret <4 x float> [[VECINIT3_I]]
test_vdupq_n_f32(float32_t a)2906 float32x4_t test_vdupq_n_f32(float32_t a) {
2907 return vdupq_n_f32(a);
2908 }
2909
2910 // CHECK-LABEL: @test_vdup_n_s64(
2911 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2912 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2913 // CHECK: ret <1 x i64> [[ADD_I]]
test_vdup_n_s64(int64_t a)2914 int64x1_t test_vdup_n_s64(int64_t a) {
2915 int64x1_t tmp = vdup_n_s64(a);
2916 return vadd_s64(tmp, tmp);
2917 }
2918
2919 // CHECK-LABEL: @test_vdup_n_u64(
2920 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2921 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2922 // CHECK: ret <1 x i64> [[ADD_I]]
test_vdup_n_u64(uint64_t a)2923 int64x1_t test_vdup_n_u64(uint64_t a) {
2924 int64x1_t tmp = (int64x1_t)vdup_n_u64(a);
2925 return vadd_s64(tmp, tmp);
2926 }
2927
2928 // CHECK-LABEL: @test_vdupq_n_s64(
2929 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2930 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2931 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2932 // CHECK: ret <2 x i64> [[ADD_I]]
test_vdupq_n_s64(int64_t a)2933 int64x2_t test_vdupq_n_s64(int64_t a) {
2934 int64x2_t tmp = vdupq_n_s64(a);
2935 return vaddq_s64(tmp, tmp);
2936 }
2937
2938 // CHECK-LABEL: @test_vdupq_n_u64(
2939 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2940 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2941 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2942 // CHECK: ret <2 x i64> [[ADD_I]]
test_vdupq_n_u64(uint64_t a)2943 uint64x2_t test_vdupq_n_u64(uint64_t a) {
2944 uint64x2_t tmp = vdupq_n_u64(a);
2945 return vaddq_u64(tmp, tmp);
2946 }
2947
2948 // CHECK-LABEL: @test_veor_s8(
2949 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2950 // CHECK: ret <8 x i8> [[XOR_I]]
test_veor_s8(int8x8_t a,int8x8_t b)2951 int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
2952 return veor_s8(a, b);
2953 }
2954
2955 // CHECK-LABEL: @test_veor_s16(
2956 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2957 // CHECK: ret <4 x i16> [[XOR_I]]
test_veor_s16(int16x4_t a,int16x4_t b)2958 int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
2959 return veor_s16(a, b);
2960 }
2961
2962 // CHECK-LABEL: @test_veor_s32(
2963 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2964 // CHECK: ret <2 x i32> [[XOR_I]]
test_veor_s32(int32x2_t a,int32x2_t b)2965 int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
2966 return veor_s32(a, b);
2967 }
2968
2969 // CHECK-LABEL: @test_veor_s64(
2970 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
2971 // CHECK: ret <1 x i64> [[XOR_I]]
test_veor_s64(int64x1_t a,int64x1_t b)2972 int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
2973 return veor_s64(a, b);
2974 }
2975
2976 // CHECK-LABEL: @test_veor_u8(
2977 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2978 // CHECK: ret <8 x i8> [[XOR_I]]
test_veor_u8(uint8x8_t a,uint8x8_t b)2979 uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
2980 return veor_u8(a, b);
2981 }
2982
2983 // CHECK-LABEL: @test_veor_u16(
2984 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2985 // CHECK: ret <4 x i16> [[XOR_I]]
test_veor_u16(uint16x4_t a,uint16x4_t b)2986 uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
2987 return veor_u16(a, b);
2988 }
2989
2990 // CHECK-LABEL: @test_veor_u32(
2991 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2992 // CHECK: ret <2 x i32> [[XOR_I]]
test_veor_u32(uint32x2_t a,uint32x2_t b)2993 uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
2994 return veor_u32(a, b);
2995 }
2996
2997 // CHECK-LABEL: @test_veor_u64(
2998 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
2999 // CHECK: ret <1 x i64> [[XOR_I]]
test_veor_u64(uint64x1_t a,uint64x1_t b)3000 uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
3001 return veor_u64(a, b);
3002 }
3003
3004 // CHECK-LABEL: @test_veorq_s8(
3005 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
3006 // CHECK: ret <16 x i8> [[XOR_I]]
test_veorq_s8(int8x16_t a,int8x16_t b)3007 int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
3008 return veorq_s8(a, b);
3009 }
3010
3011 // CHECK-LABEL: @test_veorq_s16(
3012 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
3013 // CHECK: ret <8 x i16> [[XOR_I]]
test_veorq_s16(int16x8_t a,int16x8_t b)3014 int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
3015 return veorq_s16(a, b);
3016 }
3017
3018 // CHECK-LABEL: @test_veorq_s32(
3019 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
3020 // CHECK: ret <4 x i32> [[XOR_I]]
test_veorq_s32(int32x4_t a,int32x4_t b)3021 int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
3022 return veorq_s32(a, b);
3023 }
3024
3025 // CHECK-LABEL: @test_veorq_s64(
3026 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3027 // CHECK: ret <2 x i64> [[XOR_I]]
test_veorq_s64(int64x2_t a,int64x2_t b)3028 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
3029 return veorq_s64(a, b);
3030 }
3031
3032 // CHECK-LABEL: @test_veorq_u8(
3033 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
3034 // CHECK: ret <16 x i8> [[XOR_I]]
test_veorq_u8(uint8x16_t a,uint8x16_t b)3035 uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
3036 return veorq_u8(a, b);
3037 }
3038
3039 // CHECK-LABEL: @test_veorq_u16(
3040 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
3041 // CHECK: ret <8 x i16> [[XOR_I]]
test_veorq_u16(uint16x8_t a,uint16x8_t b)3042 uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
3043 return veorq_u16(a, b);
3044 }
3045
3046 // CHECK-LABEL: @test_veorq_u32(
3047 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
3048 // CHECK: ret <4 x i32> [[XOR_I]]
test_veorq_u32(uint32x4_t a,uint32x4_t b)3049 uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
3050 return veorq_u32(a, b);
3051 }
3052
3053 // CHECK-LABEL: @test_veorq_u64(
3054 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3055 // CHECK: ret <2 x i64> [[XOR_I]]
test_veorq_u64(uint64x2_t a,uint64x2_t b)3056 uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
3057 return veorq_u64(a, b);
3058 }
3059
3060 // CHECK-LABEL: @test_vext_s8(
3061 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3062 // CHECK: ret <8 x i8> [[VEXT]]
test_vext_s8(int8x8_t a,int8x8_t b)3063 int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
3064 return vext_s8(a, b, 7);
3065 }
3066
3067 // CHECK-LABEL: @test_vext_u8(
3068 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3069 // CHECK: ret <8 x i8> [[VEXT]]
test_vext_u8(uint8x8_t a,uint8x8_t b)3070 uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
3071 return vext_u8(a, b, 7);
3072 }
3073
3074 // CHECK-LABEL: @test_vext_p8(
3075 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3076 // CHECK: ret <8 x i8> [[VEXT]]
test_vext_p8(poly8x8_t a,poly8x8_t b)3077 poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
3078 return vext_p8(a, b, 7);
3079 }
3080
3081 // CHECK-LABEL: @test_vext_s16(
3082 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3083 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3084 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3085 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3086 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3087 // CHECK: ret <4 x i16> [[VEXT]]
test_vext_s16(int16x4_t a,int16x4_t b)3088 int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
3089 return vext_s16(a, b, 3);
3090 }
3091
3092 // CHECK-LABEL: @test_vext_u16(
3093 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3094 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3095 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3096 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3097 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3098 // CHECK: ret <4 x i16> [[VEXT]]
test_vext_u16(uint16x4_t a,uint16x4_t b)3099 uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
3100 return vext_u16(a, b, 3);
3101 }
3102
3103 // CHECK-LABEL: @test_vext_p16(
3104 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3105 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3106 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3107 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3108 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3109 // CHECK: ret <4 x i16> [[VEXT]]
test_vext_p16(poly16x4_t a,poly16x4_t b)3110 poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
3111 return vext_p16(a, b, 3);
3112 }
3113
3114 // CHECK-LABEL: @test_vext_s32(
3115 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3116 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3117 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3118 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3119 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3120 // CHECK: ret <2 x i32> [[VEXT]]
test_vext_s32(int32x2_t a,int32x2_t b)3121 int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
3122 return vext_s32(a, b, 1);
3123 }
3124
3125 // CHECK-LABEL: @test_vext_u32(
3126 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3127 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3128 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3129 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3130 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3131 // CHECK: ret <2 x i32> [[VEXT]]
test_vext_u32(uint32x2_t a,uint32x2_t b)3132 uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
3133 return vext_u32(a, b, 1);
3134 }
3135
3136 // CHECK-LABEL: @test_vext_s64(
3137 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3138 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3139 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3140 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3141 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3142 // CHECK: ret <1 x i64> [[VEXT]]
test_vext_s64(int64x1_t a,int64x1_t b)3143 int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
3144 return vext_s64(a, b, 0);
3145 }
3146
3147 // CHECK-LABEL: @test_vext_u64(
3148 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3149 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3150 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3151 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3152 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3153 // CHECK: ret <1 x i64> [[VEXT]]
test_vext_u64(uint64x1_t a,uint64x1_t b)3154 uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
3155 return vext_u64(a, b, 0);
3156 }
3157
3158 // CHECK-LABEL: @test_vext_f32(
3159 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3160 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3161 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3162 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3163 // CHECK: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2>
3164 // CHECK: ret <2 x float> [[VEXT]]
test_vext_f32(float32x2_t a,float32x2_t b)3165 float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
3166 return vext_f32(a, b, 1);
3167 }
3168
3169 // CHECK-LABEL: @test_vextq_s8(
3170 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3171 // CHECK: ret <16 x i8> [[VEXT]]
test_vextq_s8(int8x16_t a,int8x16_t b)3172 int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
3173 return vextq_s8(a, b, 15);
3174 }
3175
3176 // CHECK-LABEL: @test_vextq_u8(
3177 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3178 // CHECK: ret <16 x i8> [[VEXT]]
test_vextq_u8(uint8x16_t a,uint8x16_t b)3179 uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
3180 return vextq_u8(a, b, 15);
3181 }
3182
3183 // CHECK-LABEL: @test_vextq_p8(
3184 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3185 // CHECK: ret <16 x i8> [[VEXT]]
test_vextq_p8(poly8x16_t a,poly8x16_t b)3186 poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
3187 return vextq_p8(a, b, 15);
3188 }
3189
3190 // CHECK-LABEL: @test_vextq_s16(
3191 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3192 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3193 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3194 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3195 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3196 // CHECK: ret <8 x i16> [[VEXT]]
test_vextq_s16(int16x8_t a,int16x8_t b)3197 int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
3198 return vextq_s16(a, b, 7);
3199 }
3200
3201 // CHECK-LABEL: @test_vextq_u16(
3202 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3203 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3204 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3205 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3206 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3207 // CHECK: ret <8 x i16> [[VEXT]]
test_vextq_u16(uint16x8_t a,uint16x8_t b)3208 uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
3209 return vextq_u16(a, b, 7);
3210 }
3211
3212 // CHECK-LABEL: @test_vextq_p16(
3213 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3214 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3215 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3216 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3217 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3218 // CHECK: ret <8 x i16> [[VEXT]]
test_vextq_p16(poly16x8_t a,poly16x8_t b)3219 poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
3220 return vextq_p16(a, b, 7);
3221 }
3222
3223 // CHECK-LABEL: @test_vextq_s32(
3224 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3225 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3226 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3227 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3228 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3229 // CHECK: ret <4 x i32> [[VEXT]]
test_vextq_s32(int32x4_t a,int32x4_t b)3230 int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
3231 return vextq_s32(a, b, 3);
3232 }
3233
3234 // CHECK-LABEL: @test_vextq_u32(
3235 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3236 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3237 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3238 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3239 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3240 // CHECK: ret <4 x i32> [[VEXT]]
test_vextq_u32(uint32x4_t a,uint32x4_t b)3241 uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
3242 return vextq_u32(a, b, 3);
3243 }
3244
3245 // CHECK-LABEL: @test_vextq_s64(
3246 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3247 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3248 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3249 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3250 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3251 // CHECK: ret <2 x i64> [[VEXT]]
test_vextq_s64(int64x2_t a,int64x2_t b)3252 int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
3253 return vextq_s64(a, b, 1);
3254 }
3255
3256 // CHECK-LABEL: @test_vextq_u64(
3257 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3258 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3259 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3260 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3261 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3262 // CHECK: ret <2 x i64> [[VEXT]]
test_vextq_u64(uint64x2_t a,uint64x2_t b)3263 uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
3264 return vextq_u64(a, b, 1);
3265 }
3266
3267 // CHECK-LABEL: @test_vextq_f32(
3268 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3269 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3270 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3271 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3272 // CHECK: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3273 // CHECK: ret <4 x float> [[VEXT]]
test_vextq_f32(float32x4_t a,float32x4_t b)3274 float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
3275 return vextq_f32(a, b, 3);
3276 }
3277
3278 // CHECK-LABEL: @test_vfma_f32(
3279 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3280 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3281 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3282 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a)
3283 // CHECK: ret <2 x float> [[TMP3]]
test_vfma_f32(float32x2_t a,float32x2_t b,float32x2_t c)3284 float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3285 return vfma_f32(a, b, c);
3286 }
3287
3288 // CHECK-LABEL: @test_vfmaq_f32(
3289 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3290 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3291 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3292 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a)
3293 // CHECK: ret <4 x float> [[TMP3]]
test_vfmaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3294 float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3295 return vfmaq_f32(a, b, c);
3296 }
3297
3298 // CHECK-LABEL: @test_vfms_f32(
3299 // CHECK: [[SUB_I:%.*]] = fneg <2 x float> %b
3300 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3301 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
3302 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3303 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %c, <2 x float> %a)
3304 // CHECK: ret <2 x float> [[TMP3]]
test_vfms_f32(float32x2_t a,float32x2_t b,float32x2_t c)3305 float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3306 return vfms_f32(a, b, c);
3307 }
3308
3309 // CHECK-LABEL: @test_vfmsq_f32(
3310 // CHECK: [[SUB_I:%.*]] = fneg <4 x float> %b
3311 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3312 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
3313 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3314 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %c, <4 x float> %a)
3315 // CHECK: ret <4 x float> [[TMP3]]
test_vfmsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3316 float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3317 return vfmsq_f32(a, b, c);
3318 }
3319
3320 // CHECK-LABEL: @test_vget_high_s8(
3321 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3322 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_s8(int8x16_t a)3323 int8x8_t test_vget_high_s8(int8x16_t a) {
3324 return vget_high_s8(a);
3325 }
3326
3327 // CHECK-LABEL: @test_vget_high_s16(
3328 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3329 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_s16(int16x8_t a)3330 int16x4_t test_vget_high_s16(int16x8_t a) {
3331 return vget_high_s16(a);
3332 }
3333
3334 // CHECK-LABEL: @test_vget_high_s32(
3335 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3336 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_s32(int32x4_t a)3337 int32x2_t test_vget_high_s32(int32x4_t a) {
3338 return vget_high_s32(a);
3339 }
3340
3341 // CHECK-LABEL: @test_vget_high_s64(
3342 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3343 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_s64(int64x2_t a)3344 int64x1_t test_vget_high_s64(int64x2_t a) {
3345 return vget_high_s64(a);
3346 }
3347
3348 // CHECK-LABEL: @test_vget_high_f16(
3349 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3350 // CHECK: ret <4 x half> [[SHUFFLE_I]]
test_vget_high_f16(float16x8_t a)3351 float16x4_t test_vget_high_f16(float16x8_t a) {
3352 return vget_high_f16(a);
3353 }
3354
3355 // CHECK-LABEL: @test_vget_high_f32(
3356 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
3357 // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vget_high_f32(float32x4_t a)3358 float32x2_t test_vget_high_f32(float32x4_t a) {
3359 return vget_high_f32(a);
3360 }
3361
3362 // CHECK-LABEL: @test_vget_high_u8(
3363 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3364 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_u8(uint8x16_t a)3365 uint8x8_t test_vget_high_u8(uint8x16_t a) {
3366 return vget_high_u8(a);
3367 }
3368
3369 // CHECK-LABEL: @test_vget_high_u16(
3370 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3371 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_u16(uint16x8_t a)3372 uint16x4_t test_vget_high_u16(uint16x8_t a) {
3373 return vget_high_u16(a);
3374 }
3375
3376 // CHECK-LABEL: @test_vget_high_u32(
3377 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3378 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_u32(uint32x4_t a)3379 uint32x2_t test_vget_high_u32(uint32x4_t a) {
3380 return vget_high_u32(a);
3381 }
3382
3383 // CHECK-LABEL: @test_vget_high_u64(
3384 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3385 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_u64(uint64x2_t a)3386 uint64x1_t test_vget_high_u64(uint64x2_t a) {
3387 return vget_high_u64(a);
3388 }
3389
3390 // CHECK-LABEL: @test_vget_high_p8(
3391 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3392 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_p8(poly8x16_t a)3393 poly8x8_t test_vget_high_p8(poly8x16_t a) {
3394 return vget_high_p8(a);
3395 }
3396
3397 // CHECK-LABEL: @test_vget_high_p16(
3398 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3399 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_p16(poly16x8_t a)3400 poly16x4_t test_vget_high_p16(poly16x8_t a) {
3401 return vget_high_p16(a);
3402 }
3403
3404 // CHECK-LABEL: @test_vget_lane_u8(
3405 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3406 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_u8(uint8x8_t a)3407 uint8_t test_vget_lane_u8(uint8x8_t a) {
3408 return vget_lane_u8(a, 7);
3409 }
3410
3411 // CHECK-LABEL: @test_vget_lane_u16(
3412 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3413 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_u16(uint16x4_t a)3414 uint16_t test_vget_lane_u16(uint16x4_t a) {
3415 return vget_lane_u16(a, 3);
3416 }
3417
3418 // CHECK-LABEL: @test_vget_lane_u32(
3419 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3420 // CHECK: ret i32 [[VGET_LANE]]
test_vget_lane_u32(uint32x2_t a)3421 uint32_t test_vget_lane_u32(uint32x2_t a) {
3422 return vget_lane_u32(a, 1);
3423 }
3424
3425 // CHECK-LABEL: @test_vget_lane_s8(
3426 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3427 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_s8(int8x8_t a)3428 int8_t test_vget_lane_s8(int8x8_t a) {
3429 return vget_lane_s8(a, 7);
3430 }
3431
3432 // CHECK-LABEL: @test_vget_lane_s16(
3433 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3434 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_s16(int16x4_t a)3435 int16_t test_vget_lane_s16(int16x4_t a) {
3436 return vget_lane_s16(a, 3);
3437 }
3438
3439 // CHECK-LABEL: @test_vget_lane_s32(
3440 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3441 // CHECK: ret i32 [[VGET_LANE]]
test_vget_lane_s32(int32x2_t a)3442 int32_t test_vget_lane_s32(int32x2_t a) {
3443 return vget_lane_s32(a, 1);
3444 }
3445
3446 // CHECK-LABEL: @test_vget_lane_p8(
3447 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3448 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_p8(poly8x8_t a)3449 poly8_t test_vget_lane_p8(poly8x8_t a) {
3450 return vget_lane_p8(a, 7);
3451 }
3452
3453 // CHECK-LABEL: @test_vget_lane_p16(
3454 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3455 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_p16(poly16x4_t a)3456 poly16_t test_vget_lane_p16(poly16x4_t a) {
3457 return vget_lane_p16(a, 3);
3458 }
3459
3460 // CHECK-LABEL: @test_vget_lane_f32(
3461 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
3462 // CHECK: ret float [[VGET_LANE]]
test_vget_lane_f32(float32x2_t a)3463 float32_t test_vget_lane_f32(float32x2_t a) {
3464 return vget_lane_f32(a, 1);
3465 }
3466
3467 // CHECK-LABEL: @test_vget_lane_f16(
3468 // CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8
3469 // CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2
3470 // CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
3471 // CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
3472 // CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
3473 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
3474 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
3475 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
3476 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3477 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
3478 // CHECK: ret float [[CONV]]
test_vget_lane_f16(float16x4_t a)3479 float32_t test_vget_lane_f16(float16x4_t a) {
3480 return vget_lane_f16(a, 1);
3481 }
3482
3483 // CHECK-LABEL: @test_vgetq_lane_u8(
3484 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3485 // CHECK: ret i8 [[VGET_LANE]]
test_vgetq_lane_u8(uint8x16_t a)3486 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
3487 return vgetq_lane_u8(a, 15);
3488 }
3489
3490 // CHECK-LABEL: @test_vgetq_lane_u16(
3491 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3492 // CHECK: ret i16 [[VGET_LANE]]
test_vgetq_lane_u16(uint16x8_t a)3493 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
3494 return vgetq_lane_u16(a, 7);
3495 }
3496
3497 // CHECK-LABEL: @test_vgetq_lane_u32(
3498 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3499 // CHECK: ret i32 [[VGET_LANE]]
test_vgetq_lane_u32(uint32x4_t a)3500 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
3501 return vgetq_lane_u32(a, 3);
3502 }
3503
3504 // CHECK-LABEL: @test_vgetq_lane_s8(
3505 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3506 // CHECK: ret i8 [[VGET_LANE]]
test_vgetq_lane_s8(int8x16_t a)3507 int8_t test_vgetq_lane_s8(int8x16_t a) {
3508 return vgetq_lane_s8(a, 15);
3509 }
3510
3511 // CHECK-LABEL: @test_vgetq_lane_s16(
3512 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3513 // CHECK: ret i16 [[VGET_LANE]]
test_vgetq_lane_s16(int16x8_t a)3514 int16_t test_vgetq_lane_s16(int16x8_t a) {
3515 return vgetq_lane_s16(a, 7);
3516 }
3517
3518 // CHECK-LABEL: @test_vgetq_lane_s32(
3519 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3520 // CHECK: ret i32 [[VGET_LANE]]
test_vgetq_lane_s32(int32x4_t a)3521 int32_t test_vgetq_lane_s32(int32x4_t a) {
3522 return vgetq_lane_s32(a, 3);
3523 }
3524
3525 // CHECK-LABEL: @test_vgetq_lane_p8(
3526 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3527 // CHECK: ret i8 [[VGET_LANE]]
test_vgetq_lane_p8(poly8x16_t a)3528 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
3529 return vgetq_lane_p8(a, 15);
3530 }
3531
3532 // CHECK-LABEL: @test_vgetq_lane_p16(
3533 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3534 // CHECK: ret i16 [[VGET_LANE]]
test_vgetq_lane_p16(poly16x8_t a)3535 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
3536 return vgetq_lane_p16(a, 7);
3537 }
3538
3539 // CHECK-LABEL: @test_vgetq_lane_f32(
3540 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> %a, i32 3
3541 // CHECK: ret float [[VGET_LANE]]
test_vgetq_lane_f32(float32x4_t a)3542 float32_t test_vgetq_lane_f32(float32x4_t a) {
3543 return vgetq_lane_f32(a, 3);
3544 }
3545
3546 // CHECK-LABEL: @test_vgetq_lane_f16(
3547 // CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
3548 // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
3549 // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
3550 // CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
3551 // CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
3552 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
3553 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2
3554 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
3555 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3556 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
3557 // CHECK: ret float [[CONV]]
test_vgetq_lane_f16(float16x8_t a)3558 float32_t test_vgetq_lane_f16(float16x8_t a) {
3559 return vgetq_lane_f16(a, 3);
3560 }
3561
3562 // CHECK-LABEL: @test_vget_lane_s64(
3563 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3564 // CHECK: ret i64 [[VGET_LANE]]
test_vget_lane_s64(int64x1_t a)3565 int64_t test_vget_lane_s64(int64x1_t a) {
3566 return vget_lane_s64(a, 0);
3567 }
3568
3569 // CHECK-LABEL: @test_vget_lane_u64(
3570 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3571 // CHECK: ret i64 [[VGET_LANE]]
test_vget_lane_u64(uint64x1_t a)3572 uint64_t test_vget_lane_u64(uint64x1_t a) {
3573 return vget_lane_u64(a, 0);
3574 }
3575
3576 // CHECK-LABEL: @test_vgetq_lane_s64(
3577 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3578 // CHECK: ret i64 [[VGET_LANE]]
test_vgetq_lane_s64(int64x2_t a)3579 int64_t test_vgetq_lane_s64(int64x2_t a) {
3580 return vgetq_lane_s64(a, 1);
3581 }
3582
3583 // CHECK-LABEL: @test_vgetq_lane_u64(
3584 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3585 // CHECK: ret i64 [[VGET_LANE]]
test_vgetq_lane_u64(uint64x2_t a)3586 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
3587 return vgetq_lane_u64(a, 1);
3588 }
3589
3590 // CHECK-LABEL: @test_vget_low_s8(
3591 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3592 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_s8(int8x16_t a)3593 int8x8_t test_vget_low_s8(int8x16_t a) {
3594 return vget_low_s8(a);
3595 }
3596
3597 // CHECK-LABEL: @test_vget_low_s16(
3598 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3599 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_s16(int16x8_t a)3600 int16x4_t test_vget_low_s16(int16x8_t a) {
3601 return vget_low_s16(a);
3602 }
3603
3604 // CHECK-LABEL: @test_vget_low_s32(
3605 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3606 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_s32(int32x4_t a)3607 int32x2_t test_vget_low_s32(int32x4_t a) {
3608 return vget_low_s32(a);
3609 }
3610
3611 // CHECK-LABEL: @test_vget_low_s64(
3612 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3613 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_s64(int64x2_t a)3614 int64x1_t test_vget_low_s64(int64x2_t a) {
3615 return vget_low_s64(a);
3616 }
3617
3618 // CHECK-LABEL: @test_vget_low_f16(
3619 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3620 // CHECK: ret <4 x half> [[SHUFFLE_I]]
test_vget_low_f16(float16x8_t a)3621 float16x4_t test_vget_low_f16(float16x8_t a) {
3622 return vget_low_f16(a);
3623 }
3624
3625 // CHECK-LABEL: @test_vget_low_f32(
3626 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
3627 // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vget_low_f32(float32x4_t a)3628 float32x2_t test_vget_low_f32(float32x4_t a) {
3629 return vget_low_f32(a);
3630 }
3631
3632 // CHECK-LABEL: @test_vget_low_u8(
3633 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3634 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_u8(uint8x16_t a)3635 uint8x8_t test_vget_low_u8(uint8x16_t a) {
3636 return vget_low_u8(a);
3637 }
3638
3639 // CHECK-LABEL: @test_vget_low_u16(
3640 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3641 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_u16(uint16x8_t a)3642 uint16x4_t test_vget_low_u16(uint16x8_t a) {
3643 return vget_low_u16(a);
3644 }
3645
3646 // CHECK-LABEL: @test_vget_low_u32(
3647 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3648 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_u32(uint32x4_t a)3649 uint32x2_t test_vget_low_u32(uint32x4_t a) {
3650 return vget_low_u32(a);
3651 }
3652
3653 // CHECK-LABEL: @test_vget_low_u64(
3654 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3655 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_u64(uint64x2_t a)3656 uint64x1_t test_vget_low_u64(uint64x2_t a) {
3657 return vget_low_u64(a);
3658 }
3659
3660 // CHECK-LABEL: @test_vget_low_p8(
3661 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3662 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_p8(poly8x16_t a)3663 poly8x8_t test_vget_low_p8(poly8x16_t a) {
3664 return vget_low_p8(a);
3665 }
3666
3667 // CHECK-LABEL: @test_vget_low_p16(
3668 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3669 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_p16(poly16x8_t a)3670 poly16x4_t test_vget_low_p16(poly16x8_t a) {
3671 return vget_low_p16(a);
3672 }
3673
3674 // CHECK-LABEL: @test_vhadd_s8(
3675 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
3676 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t a,int8x8_t b)3677 int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) {
3678 return vhadd_s8(a, b);
3679 }
3680
3681 // CHECK-LABEL: @test_vhadd_s16(
3682 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3683 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3684 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
3685 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3686 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_s16(int16x4_t a,int16x4_t b)3687 int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) {
3688 return vhadd_s16(a, b);
3689 }
3690
3691 // CHECK-LABEL: @test_vhadd_s32(
3692 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3693 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3694 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
3695 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3696 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_s32(int32x2_t a,int32x2_t b)3697 int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) {
3698 return vhadd_s32(a, b);
3699 }
3700
3701 // CHECK-LABEL: @test_vhadd_u8(
3702 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
3703 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t a,uint8x8_t b)3704 uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) {
3705 return vhadd_u8(a, b);
3706 }
3707
3708 // CHECK-LABEL: @test_vhadd_u16(
3709 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3710 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3711 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
3712 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3713 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_u16(uint16x4_t a,uint16x4_t b)3714 uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) {
3715 return vhadd_u16(a, b);
3716 }
3717
3718 // CHECK-LABEL: @test_vhadd_u32(
3719 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3720 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3721 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
3722 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3723 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_u32(uint32x2_t a,uint32x2_t b)3724 uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) {
3725 return vhadd_u32(a, b);
3726 }
3727
3728 // CHECK-LABEL: @test_vhaddq_s8(
3729 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
3730 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t a,int8x16_t b)3731 int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) {
3732 return vhaddq_s8(a, b);
3733 }
3734
3735 // CHECK-LABEL: @test_vhaddq_s16(
3736 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3737 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3738 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
3739 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3740 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_s16(int16x8_t a,int16x8_t b)3741 int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) {
3742 return vhaddq_s16(a, b);
3743 }
3744
3745 // CHECK-LABEL: @test_vhaddq_s32(
3746 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3747 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3748 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
3749 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3750 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_s32(int32x4_t a,int32x4_t b)3751 int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) {
3752 return vhaddq_s32(a, b);
3753 }
3754
3755 // CHECK-LABEL: @test_vhaddq_u8(
3756 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
3757 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t a,uint8x16_t b)3758 uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) {
3759 return vhaddq_u8(a, b);
3760 }
3761
3762 // CHECK-LABEL: @test_vhaddq_u16(
3763 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3764 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3765 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
3766 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3767 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_u16(uint16x8_t a,uint16x8_t b)3768 uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) {
3769 return vhaddq_u16(a, b);
3770 }
3771
3772 // CHECK-LABEL: @test_vhaddq_u32(
3773 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3774 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3775 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
3776 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3777 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_u32(uint32x4_t a,uint32x4_t b)3778 uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) {
3779 return vhaddq_u32(a, b);
3780 }
3781
3782 // CHECK-LABEL: @test_vhsub_s8(
3783 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b)
3784 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t a,int8x8_t b)3785 int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) {
3786 return vhsub_s8(a, b);
3787 }
3788
3789 // CHECK-LABEL: @test_vhsub_s16(
3790 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3791 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3792 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b)
3793 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3794 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_s16(int16x4_t a,int16x4_t b)3795 int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) {
3796 return vhsub_s16(a, b);
3797 }
3798
3799 // CHECK-LABEL: @test_vhsub_s32(
3800 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3801 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3802 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b)
3803 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3804 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_s32(int32x2_t a,int32x2_t b)3805 int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) {
3806 return vhsub_s32(a, b);
3807 }
3808
3809 // CHECK-LABEL: @test_vhsub_u8(
3810 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b)
3811 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t a,uint8x8_t b)3812 uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) {
3813 return vhsub_u8(a, b);
3814 }
3815
3816 // CHECK-LABEL: @test_vhsub_u16(
3817 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3818 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3819 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b)
3820 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3821 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_u16(uint16x4_t a,uint16x4_t b)3822 uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) {
3823 return vhsub_u16(a, b);
3824 }
3825
3826 // CHECK-LABEL: @test_vhsub_u32(
3827 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3828 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3829 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b)
3830 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3831 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_u32(uint32x2_t a,uint32x2_t b)3832 uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) {
3833 return vhsub_u32(a, b);
3834 }
3835
3836 // CHECK-LABEL: @test_vhsubq_s8(
3837 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b)
3838 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t a,int8x16_t b)3839 int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) {
3840 return vhsubq_s8(a, b);
3841 }
3842
3843 // CHECK-LABEL: @test_vhsubq_s16(
3844 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3845 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3846 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b)
3847 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3848 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_s16(int16x8_t a,int16x8_t b)3849 int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) {
3850 return vhsubq_s16(a, b);
3851 }
3852
3853 // CHECK-LABEL: @test_vhsubq_s32(
3854 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3855 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3856 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b)
3857 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3858 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_s32(int32x4_t a,int32x4_t b)3859 int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) {
3860 return vhsubq_s32(a, b);
3861 }
3862
3863 // CHECK-LABEL: @test_vhsubq_u8(
3864 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b)
3865 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t a,uint8x16_t b)3866 uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) {
3867 return vhsubq_u8(a, b);
3868 }
3869
3870 // CHECK-LABEL: @test_vhsubq_u16(
3871 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3872 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3873 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b)
3874 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3875 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_u16(uint16x8_t a,uint16x8_t b)3876 uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) {
3877 return vhsubq_u16(a, b);
3878 }
3879
3880 // CHECK-LABEL: @test_vhsubq_u32(
3881 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3882 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3883 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b)
3884 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3885 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_u32(uint32x4_t a,uint32x4_t b)3886 uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) {
3887 return vhsubq_u32(a, b);
3888 }
3889
3890 // CHECK-LABEL: @test_vld1q_u8(
3891 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3892 // CHECK: ret <16 x i8> [[VLD1]]
test_vld1q_u8(uint8_t const * a)3893 uint8x16_t test_vld1q_u8(uint8_t const * a) {
3894 return vld1q_u8(a);
3895 }
3896
3897 // CHECK-LABEL: @test_vld1q_u16(
3898 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
3899 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3900 // CHECK: ret <8 x i16> [[VLD1]]
test_vld1q_u16(uint16_t const * a)3901 uint16x8_t test_vld1q_u16(uint16_t const * a) {
3902 return vld1q_u16(a);
3903 }
3904
3905 // CHECK-LABEL: @test_vld1q_u32(
3906 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
3907 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
3908 // CHECK: ret <4 x i32> [[VLD1]]
test_vld1q_u32(uint32_t const * a)3909 uint32x4_t test_vld1q_u32(uint32_t const * a) {
3910 return vld1q_u32(a);
3911 }
3912
3913 // CHECK-LABEL: @test_vld1q_u64(
3914 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
3915 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
3916 // CHECK: ret <2 x i64> [[VLD1]]
test_vld1q_u64(uint64_t const * a)3917 uint64x2_t test_vld1q_u64(uint64_t const * a) {
3918 return vld1q_u64(a);
3919 }
3920
3921 // CHECK-LABEL: @test_vld1q_s8(
3922 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3923 // CHECK: ret <16 x i8> [[VLD1]]
test_vld1q_s8(int8_t const * a)3924 int8x16_t test_vld1q_s8(int8_t const * a) {
3925 return vld1q_s8(a);
3926 }
3927
3928 // CHECK-LABEL: @test_vld1q_s16(
3929 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
3930 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3931 // CHECK: ret <8 x i16> [[VLD1]]
test_vld1q_s16(int16_t const * a)3932 int16x8_t test_vld1q_s16(int16_t const * a) {
3933 return vld1q_s16(a);
3934 }
3935
3936 // CHECK-LABEL: @test_vld1q_s32(
3937 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
3938 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
3939 // CHECK: ret <4 x i32> [[VLD1]]
test_vld1q_s32(int32_t const * a)3940 int32x4_t test_vld1q_s32(int32_t const * a) {
3941 return vld1q_s32(a);
3942 }
3943
3944 // CHECK-LABEL: @test_vld1q_s64(
3945 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
3946 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
3947 // CHECK: ret <2 x i64> [[VLD1]]
test_vld1q_s64(int64_t const * a)3948 int64x2_t test_vld1q_s64(int64_t const * a) {
3949 return vld1q_s64(a);
3950 }
3951
3952 // CHECK-LABEL: @test_vld1q_f16(
3953 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
3954 // CHECK: [[VLD1:%.*]] = call <8 x half> @llvm.arm.neon.vld1.v8f16.p0i8(i8* [[TMP0]], i32 2)
3955 // CHECK: ret <8 x half> [[VLD1]]
test_vld1q_f16(float16_t const * a)3956 float16x8_t test_vld1q_f16(float16_t const * a) {
3957 return vld1q_f16(a);
3958 }
3959
3960 // CHECK-LABEL: @test_vld1q_f32(
3961 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
3962 // CHECK: [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* [[TMP0]], i32 4)
3963 // CHECK: ret <4 x float> [[VLD1]]
test_vld1q_f32(float32_t const * a)3964 float32x4_t test_vld1q_f32(float32_t const * a) {
3965 return vld1q_f32(a);
3966 }
3967
3968 // CHECK-LABEL: @test_vld1q_p8(
3969 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3970 // CHECK: ret <16 x i8> [[VLD1]]
test_vld1q_p8(poly8_t const * a)3971 poly8x16_t test_vld1q_p8(poly8_t const * a) {
3972 return vld1q_p8(a);
3973 }
3974
3975 // CHECK-LABEL: @test_vld1q_p16(
3976 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
3977 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3978 // CHECK: ret <8 x i16> [[VLD1]]
test_vld1q_p16(poly16_t const * a)3979 poly16x8_t test_vld1q_p16(poly16_t const * a) {
3980 return vld1q_p16(a);
3981 }
3982
3983 // CHECK-LABEL: @test_vld1_u8(
3984 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
3985 // CHECK: ret <8 x i8> [[VLD1]]
test_vld1_u8(uint8_t const * a)3986 uint8x8_t test_vld1_u8(uint8_t const * a) {
3987 return vld1_u8(a);
3988 }
3989
3990 // CHECK-LABEL: @test_vld1_u16(
3991 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
3992 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
3993 // CHECK: ret <4 x i16> [[VLD1]]
test_vld1_u16(uint16_t const * a)3994 uint16x4_t test_vld1_u16(uint16_t const * a) {
3995 return vld1_u16(a);
3996 }
3997
3998 // CHECK-LABEL: @test_vld1_u32(
3999 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4000 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
4001 // CHECK: ret <2 x i32> [[VLD1]]
test_vld1_u32(uint32_t const * a)4002 uint32x2_t test_vld1_u32(uint32_t const * a) {
4003 return vld1_u32(a);
4004 }
4005
4006 // CHECK-LABEL: @test_vld1_u64(
4007 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4008 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4009 // CHECK: ret <1 x i64> [[VLD1]]
test_vld1_u64(uint64_t const * a)4010 uint64x1_t test_vld1_u64(uint64_t const * a) {
4011 return vld1_u64(a);
4012 }
4013
4014 // CHECK-LABEL: @test_vld1_s8(
4015 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
4016 // CHECK: ret <8 x i8> [[VLD1]]
test_vld1_s8(int8_t const * a)4017 int8x8_t test_vld1_s8(int8_t const * a) {
4018 return vld1_s8(a);
4019 }
4020
4021 // CHECK-LABEL: @test_vld1_s16(
4022 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4023 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4024 // CHECK: ret <4 x i16> [[VLD1]]
test_vld1_s16(int16_t const * a)4025 int16x4_t test_vld1_s16(int16_t const * a) {
4026 return vld1_s16(a);
4027 }
4028
4029 // CHECK-LABEL: @test_vld1_s32(
4030 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4031 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
4032 // CHECK: ret <2 x i32> [[VLD1]]
test_vld1_s32(int32_t const * a)4033 int32x2_t test_vld1_s32(int32_t const * a) {
4034 return vld1_s32(a);
4035 }
4036
4037 // CHECK-LABEL: @test_vld1_s64(
4038 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4039 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4040 // CHECK: ret <1 x i64> [[VLD1]]
test_vld1_s64(int64_t const * a)4041 int64x1_t test_vld1_s64(int64_t const * a) {
4042 return vld1_s64(a);
4043 }
4044
4045 // CHECK-LABEL: @test_vld1_f16(
4046 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4047 // CHECK: [[VLD1:%.*]] = call <4 x half> @llvm.arm.neon.vld1.v4f16.p0i8(i8* [[TMP0]], i32 2)
4048 // CHECK: ret <4 x half> [[VLD1]]
test_vld1_f16(float16_t const * a)4049 float16x4_t test_vld1_f16(float16_t const * a) {
4050 return vld1_f16(a);
4051 }
4052
4053 // CHECK-LABEL: @test_vld1_f32(
4054 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4055 // CHECK: [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* [[TMP0]], i32 4)
4056 // CHECK: ret <2 x float> [[VLD1]]
test_vld1_f32(float32_t const * a)4057 float32x2_t test_vld1_f32(float32_t const * a) {
4058 return vld1_f32(a);
4059 }
4060
4061 // CHECK-LABEL: @test_vld1_p8(
4062 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
4063 // CHECK: ret <8 x i8> [[VLD1]]
test_vld1_p8(poly8_t const * a)4064 poly8x8_t test_vld1_p8(poly8_t const * a) {
4065 return vld1_p8(a);
4066 }
4067
4068 // CHECK-LABEL: @test_vld1_p16(
4069 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4070 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4071 // CHECK: ret <4 x i16> [[VLD1]]
test_vld1_p16(poly16_t const * a)4072 poly16x4_t test_vld1_p16(poly16_t const * a) {
4073 return vld1_p16(a);
4074 }
4075
4076 // CHECK-LABEL: @test_vld1q_dup_u8(
4077 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4078 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4079 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4080 // CHECK: ret <16 x i8> [[LANE]]
test_vld1q_dup_u8(uint8_t const * a)4081 uint8x16_t test_vld1q_dup_u8(uint8_t const * a) {
4082 return vld1q_dup_u8(a);
4083 }
4084
4085 // CHECK-LABEL: @test_vld1q_dup_u16(
4086 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4087 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4088 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4089 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4090 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4091 // CHECK: ret <8 x i16> [[LANE]]
test_vld1q_dup_u16(uint16_t const * a)4092 uint16x8_t test_vld1q_dup_u16(uint16_t const * a) {
4093 return vld1q_dup_u16(a);
4094 }
4095
4096 // CHECK-LABEL: @test_vld1q_dup_u32(
4097 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4098 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4099 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4100 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4101 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4102 // CHECK: ret <4 x i32> [[LANE]]
test_vld1q_dup_u32(uint32_t const * a)4103 uint32x4_t test_vld1q_dup_u32(uint32_t const * a) {
4104 return vld1q_dup_u32(a);
4105 }
4106
4107 // CHECK-LABEL: @test_vld1q_dup_u64(
4108 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4109 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4110 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4111 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4112 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4113 // CHECK: ret <2 x i64> [[LANE]]
test_vld1q_dup_u64(uint64_t const * a)4114 uint64x2_t test_vld1q_dup_u64(uint64_t const * a) {
4115 return vld1q_dup_u64(a);
4116 }
4117
4118 // CHECK-LABEL: @test_vld1q_dup_s8(
4119 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4120 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4121 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4122 // CHECK: ret <16 x i8> [[LANE]]
test_vld1q_dup_s8(int8_t const * a)4123 int8x16_t test_vld1q_dup_s8(int8_t const * a) {
4124 return vld1q_dup_s8(a);
4125 }
4126
4127 // CHECK-LABEL: @test_vld1q_dup_s16(
4128 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4129 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4130 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4131 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4132 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4133 // CHECK: ret <8 x i16> [[LANE]]
test_vld1q_dup_s16(int16_t const * a)4134 int16x8_t test_vld1q_dup_s16(int16_t const * a) {
4135 return vld1q_dup_s16(a);
4136 }
4137
4138 // CHECK-LABEL: @test_vld1q_dup_s32(
4139 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4140 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4141 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4142 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4143 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4144 // CHECK: ret <4 x i32> [[LANE]]
test_vld1q_dup_s32(int32_t const * a)4145 int32x4_t test_vld1q_dup_s32(int32_t const * a) {
4146 return vld1q_dup_s32(a);
4147 }
4148
4149 // CHECK-LABEL: @test_vld1q_dup_s64(
4150 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4151 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4152 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4153 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4154 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4155 // CHECK: ret <2 x i64> [[LANE]]
test_vld1q_dup_s64(int64_t const * a)4156 int64x2_t test_vld1q_dup_s64(int64_t const * a) {
4157 return vld1q_dup_s64(a);
4158 }
4159
4160 // CHECK-LABEL: @test_vld1q_dup_f16(
4161 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4162 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
4163 // CHECK: [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
4164 // CHECK: [[TMP3:%.*]] = insertelement <8 x half> undef, half [[TMP2]], i32 0
4165 // CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
4166 // CHECK: ret <8 x half> [[LANE]]
test_vld1q_dup_f16(float16_t const * a)4167 float16x8_t test_vld1q_dup_f16(float16_t const * a) {
4168 return vld1q_dup_f16(a);
4169 }
4170
4171 // CHECK-LABEL: @test_vld1q_dup_f32(
4172 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4173 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4174 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4175 // CHECK: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
4176 // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer
4177 // CHECK: ret <4 x float> [[LANE]]
test_vld1q_dup_f32(float32_t const * a)4178 float32x4_t test_vld1q_dup_f32(float32_t const * a) {
4179 return vld1q_dup_f32(a);
4180 }
4181
4182 // CHECK-LABEL: @test_vld1q_dup_p8(
4183 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4184 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4185 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4186 // CHECK: ret <16 x i8> [[LANE]]
test_vld1q_dup_p8(poly8_t const * a)4187 poly8x16_t test_vld1q_dup_p8(poly8_t const * a) {
4188 return vld1q_dup_p8(a);
4189 }
4190
4191 // CHECK-LABEL: @test_vld1q_dup_p16(
4192 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4193 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4194 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4195 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4196 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4197 // CHECK: ret <8 x i16> [[LANE]]
test_vld1q_dup_p16(poly16_t const * a)4198 poly16x8_t test_vld1q_dup_p16(poly16_t const * a) {
4199 return vld1q_dup_p16(a);
4200 }
4201
4202 // CHECK-LABEL: @test_vld1_dup_u8(
4203 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4204 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4205 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4206 // CHECK: ret <8 x i8> [[LANE]]
test_vld1_dup_u8(uint8_t const * a)4207 uint8x8_t test_vld1_dup_u8(uint8_t const * a) {
4208 return vld1_dup_u8(a);
4209 }
4210
4211 // CHECK-LABEL: @test_vld1_dup_u16(
4212 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4213 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4214 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4215 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4216 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4217 // CHECK: ret <4 x i16> [[LANE]]
test_vld1_dup_u16(uint16_t const * a)4218 uint16x4_t test_vld1_dup_u16(uint16_t const * a) {
4219 return vld1_dup_u16(a);
4220 }
4221
4222 // CHECK-LABEL: @test_vld1_dup_u32(
4223 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4224 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4225 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4226 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4227 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4228 // CHECK: ret <2 x i32> [[LANE]]
test_vld1_dup_u32(uint32_t const * a)4229 uint32x2_t test_vld1_dup_u32(uint32_t const * a) {
4230 return vld1_dup_u32(a);
4231 }
4232
4233 // CHECK-LABEL: @test_vld1_dup_u64(
4234 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4235 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4236 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4237 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4238 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4239 // CHECK: ret <1 x i64> [[LANE]]
test_vld1_dup_u64(uint64_t const * a)4240 uint64x1_t test_vld1_dup_u64(uint64_t const * a) {
4241 return vld1_dup_u64(a);
4242 }
4243
4244 // CHECK-LABEL: @test_vld1_dup_s8(
4245 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4246 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4247 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4248 // CHECK: ret <8 x i8> [[LANE]]
test_vld1_dup_s8(int8_t const * a)4249 int8x8_t test_vld1_dup_s8(int8_t const * a) {
4250 return vld1_dup_s8(a);
4251 }
4252
4253 // CHECK-LABEL: @test_vld1_dup_s16(
4254 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4255 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4256 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4257 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4258 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4259 // CHECK: ret <4 x i16> [[LANE]]
test_vld1_dup_s16(int16_t const * a)4260 int16x4_t test_vld1_dup_s16(int16_t const * a) {
4261 return vld1_dup_s16(a);
4262 }
4263
4264 // CHECK-LABEL: @test_vld1_dup_s32(
4265 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4266 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4267 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4268 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4269 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4270 // CHECK: ret <2 x i32> [[LANE]]
test_vld1_dup_s32(int32_t const * a)4271 int32x2_t test_vld1_dup_s32(int32_t const * a) {
4272 return vld1_dup_s32(a);
4273 }
4274
4275 // CHECK-LABEL: @test_vld1_dup_s64(
4276 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4277 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4278 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4279 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4280 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4281 // CHECK: ret <1 x i64> [[LANE]]
test_vld1_dup_s64(int64_t const * a)4282 int64x1_t test_vld1_dup_s64(int64_t const * a) {
4283 return vld1_dup_s64(a);
4284 }
4285
4286 // CHECK-LABEL: @test_vld1_dup_f16(
4287 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4288 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
4289 // CHECK: [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
4290 // CHECK: [[TMP3:%.*]] = insertelement <4 x half> undef, half [[TMP2]], i32 0
4291 // CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
4292 // CHECK: ret <4 x half> [[LANE]]
test_vld1_dup_f16(float16_t const * a)4293 float16x4_t test_vld1_dup_f16(float16_t const * a) {
4294 return vld1_dup_f16(a);
4295 }
4296
4297 // CHECK-LABEL: @test_vld1_dup_f32(
4298 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4299 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4300 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4301 // CHECK: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
4302 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
4303 // CHECK: ret <2 x float> [[LANE]]
test_vld1_dup_f32(float32_t const * a)4304 float32x2_t test_vld1_dup_f32(float32_t const * a) {
4305 return vld1_dup_f32(a);
4306 }
4307
4308 // CHECK-LABEL: @test_vld1_dup_p8(
4309 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4310 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4311 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4312 // CHECK: ret <8 x i8> [[LANE]]
test_vld1_dup_p8(poly8_t const * a)4313 poly8x8_t test_vld1_dup_p8(poly8_t const * a) {
4314 return vld1_dup_p8(a);
4315 }
4316
4317 // CHECK-LABEL: @test_vld1_dup_p16(
4318 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4319 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4320 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4321 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4322 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4323 // CHECK: ret <4 x i16> [[LANE]]
test_vld1_dup_p16(poly16_t const * a)4324 poly16x4_t test_vld1_dup_p16(poly16_t const * a) {
4325 return vld1_dup_p16(a);
4326 }
4327
4328 // CHECK-LABEL: @test_vld1q_lane_u8(
4329 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4330 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4331 // CHECK: ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_u8(uint8_t const * a,uint8x16_t b)4332 uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) {
4333 return vld1q_lane_u8(a, b, 15);
4334 }
4335
4336 // CHECK-LABEL: @test_vld1q_lane_u16(
4337 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4338 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4339 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4340 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4341 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4342 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4343 // CHECK: ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_u16(uint16_t const * a,uint16x8_t b)4344 uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) {
4345 return vld1q_lane_u16(a, b, 7);
4346 }
4347
4348 // CHECK-LABEL: @test_vld1q_lane_u32(
4349 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4350 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4351 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4352 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4353 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4354 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4355 // CHECK: ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_u32(uint32_t const * a,uint32x4_t b)4356 uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) {
4357 return vld1q_lane_u32(a, b, 3);
4358 }
4359
4360 // CHECK-LABEL: @test_vld1q_lane_u64(
4361 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4362 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4363 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4364 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4365 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4366 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4367 // CHECK: ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_u64(uint64_t const * a,uint64x2_t b)4368 uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) {
4369 return vld1q_lane_u64(a, b, 1);
4370 }
4371
4372 // CHECK-LABEL: @test_vld1q_lane_s8(
4373 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4374 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4375 // CHECK: ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_s8(int8_t const * a,int8x16_t b)4376 int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) {
4377 return vld1q_lane_s8(a, b, 15);
4378 }
4379
4380 // CHECK-LABEL: @test_vld1q_lane_s16(
4381 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4382 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4383 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4384 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4385 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4386 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4387 // CHECK: ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_s16(int16_t const * a,int16x8_t b)4388 int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) {
4389 return vld1q_lane_s16(a, b, 7);
4390 }
4391
4392 // CHECK-LABEL: @test_vld1q_lane_s32(
4393 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4394 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4395 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4396 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4397 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4398 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4399 // CHECK: ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_s32(int32_t const * a,int32x4_t b)4400 int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) {
4401 return vld1q_lane_s32(a, b, 3);
4402 }
4403
4404 // CHECK-LABEL: @test_vld1q_lane_s64(
4405 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4406 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4407 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4408 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4409 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4410 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4411 // CHECK: ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_s64(int64_t const * a,int64x2_t b)4412 int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
4413 return vld1q_lane_s64(a, b, 1);
4414 }
4415
4416 // CHECK-LABEL: @test_vld1q_lane_f16(
4417 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4418 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
4419 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
4420 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
4421 // CHECK: [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
4422 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
4423 // CHECK: ret <8 x half> [[VLD1_LANE]]
test_vld1q_lane_f16(float16_t const * a,float16x8_t b)4424 float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
4425 return vld1q_lane_f16(a, b, 7);
4426 }
4427
4428 // CHECK-LABEL: @test_vld1q_lane_f32(
4429 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4430 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4431 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4432 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4433 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4434 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3
4435 // CHECK: ret <4 x float> [[VLD1_LANE]]
test_vld1q_lane_f32(float32_t const * a,float32x4_t b)4436 float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) {
4437 return vld1q_lane_f32(a, b, 3);
4438 }
4439
4440 // CHECK-LABEL: @test_vld1q_lane_p8(
4441 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4442 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4443 // CHECK: ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_p8(poly8_t const * a,poly8x16_t b)4444 poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) {
4445 return vld1q_lane_p8(a, b, 15);
4446 }
4447
4448 // CHECK-LABEL: @test_vld1q_lane_p16(
4449 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4450 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4451 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4452 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4453 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4454 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4455 // CHECK: ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_p16(poly16_t const * a,poly16x8_t b)4456 poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) {
4457 return vld1q_lane_p16(a, b, 7);
4458 }
4459
4460 // CHECK-LABEL: @test_vld1_lane_u8(
4461 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4462 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4463 // CHECK: ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_u8(uint8_t const * a,uint8x8_t b)4464 uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) {
4465 return vld1_lane_u8(a, b, 7);
4466 }
4467
4468 // CHECK-LABEL: @test_vld1_lane_u16(
4469 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4470 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4471 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4472 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4473 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4474 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4475 // CHECK: ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_u16(uint16_t const * a,uint16x4_t b)4476 uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) {
4477 return vld1_lane_u16(a, b, 3);
4478 }
4479
4480 // CHECK-LABEL: @test_vld1_lane_u32(
4481 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4482 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4483 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4484 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4485 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4486 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4487 // CHECK: ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_u32(uint32_t const * a,uint32x2_t b)4488 uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) {
4489 return vld1_lane_u32(a, b, 1);
4490 }
4491
4492 // CHECK-LABEL: @test_vld1_lane_u64(
4493 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4494 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4495 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4496 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4497 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4498 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4499 // CHECK: ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_u64(uint64_t const * a,uint64x1_t b)4500 uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) {
4501 return vld1_lane_u64(a, b, 0);
4502 }
4503
4504 // CHECK-LABEL: @test_vld1_lane_s8(
4505 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4506 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4507 // CHECK: ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_s8(int8_t const * a,int8x8_t b)4508 int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) {
4509 return vld1_lane_s8(a, b, 7);
4510 }
4511
4512 // CHECK-LABEL: @test_vld1_lane_s16(
4513 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4514 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4515 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4516 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4517 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4518 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4519 // CHECK: ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_s16(int16_t const * a,int16x4_t b)4520 int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) {
4521 return vld1_lane_s16(a, b, 3);
4522 }
4523
4524 // CHECK-LABEL: @test_vld1_lane_s32(
4525 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4526 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4527 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4528 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4529 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4530 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4531 // CHECK: ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_s32(int32_t const * a,int32x2_t b)4532 int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) {
4533 return vld1_lane_s32(a, b, 1);
4534 }
4535
4536 // CHECK-LABEL: @test_vld1_lane_s64(
4537 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4538 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4539 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4540 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4541 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4542 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4543 // CHECK: ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_s64(int64_t const * a,int64x1_t b)4544 int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
4545 return vld1_lane_s64(a, b, 0);
4546 }
4547
4548 // CHECK-LABEL: @test_vld1_lane_f16(
4549 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4550 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
4551 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
4552 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
4553 // CHECK: [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
4554 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
4555 // CHECK: ret <4 x half> [[VLD1_LANE]]
test_vld1_lane_f16(float16_t const * a,float16x4_t b)4556 float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
4557 return vld1_lane_f16(a, b, 3);
4558 }
4559
4560 // CHECK-LABEL: @test_vld1_lane_f32(
4561 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4562 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4563 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4564 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4565 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4566 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1
4567 // CHECK: ret <2 x float> [[VLD1_LANE]]
test_vld1_lane_f32(float32_t const * a,float32x2_t b)4568 float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) {
4569 return vld1_lane_f32(a, b, 1);
4570 }
4571
4572 // CHECK-LABEL: @test_vld1_lane_p8(
4573 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4574 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4575 // CHECK: ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_p8(poly8_t const * a,poly8x8_t b)4576 poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) {
4577 return vld1_lane_p8(a, b, 7);
4578 }
4579
4580 // CHECK-LABEL: @test_vld1_lane_p16(
4581 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4582 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4583 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4584 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4585 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4586 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4587 // CHECK: ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_p16(poly16_t const * a,poly16x4_t b)4588 poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) {
4589 return vld1_lane_p16(a, b, 3);
4590 }
4591
4592 // CHECK-LABEL: @test_vld2q_u8(
4593 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
4594 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
4595 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_u8(uint8_t const * a)4596 uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
4597 return vld2q_u8(a);
4598 }
4599
4600 // CHECK-LABEL: @test_vld2q_u16(
4601 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4602 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4603 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4604 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_u16(uint16_t const * a)4605 uint16x8x2_t test_vld2q_u16(uint16_t const * a) {
4606 return vld2q_u16(a);
4607 }
4608
4609 // CHECK-LABEL: @test_vld2q_u32(
4610 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4611 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4612 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4613 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_u32(uint32_t const * a)4614 uint32x4x2_t test_vld2q_u32(uint32_t const * a) {
4615 return vld2q_u32(a);
4616 }
4617
4618 // CHECK-LABEL: @test_vld2q_s8(
4619 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
4620 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
4621 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_s8(int8_t const * a)4622 int8x16x2_t test_vld2q_s8(int8_t const * a) {
4623 return vld2q_s8(a);
4624 }
4625
4626 // CHECK-LABEL: @test_vld2q_s16(
4627 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4628 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4629 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4630 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_s16(int16_t const * a)4631 int16x8x2_t test_vld2q_s16(int16_t const * a) {
4632 return vld2q_s16(a);
4633 }
4634
4635 // CHECK-LABEL: @test_vld2q_s32(
4636 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4637 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4638 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4639 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_s32(int32_t const * a)4640 int32x4x2_t test_vld2q_s32(int32_t const * a) {
4641 return vld2q_s32(a);
4642 }
4643
4644 // CHECK-LABEL: @test_vld2q_f16(
4645 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4646 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4647 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
4648 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x half>, <8 x half>
test_vld2q_f16(float16_t const * a)4649 float16x8x2_t test_vld2q_f16(float16_t const * a) {
4650 return vld2q_f16(a);
4651 }
4652
4653 // CHECK-LABEL: @test_vld2q_f32(
4654 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4655 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4656 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
4657 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float>
test_vld2q_f32(float32_t const * a)4658 float32x4x2_t test_vld2q_f32(float32_t const * a) {
4659 return vld2q_f32(a);
4660 }
4661
4662 // CHECK-LABEL: @test_vld2q_p8(
4663 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
4664 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
4665 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_p8(poly8_t const * a)4666 poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
4667 return vld2q_p8(a);
4668 }
4669
4670 // CHECK-LABEL: @test_vld2q_p16(
4671 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4672 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4673 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4674 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_p16(poly16_t const * a)4675 poly16x8x2_t test_vld2q_p16(poly16_t const * a) {
4676 return vld2q_p16(a);
4677 }
4678
4679 // CHECK-LABEL: @test_vld2_u8(
4680 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4681 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4682 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_u8(uint8_t const * a)4683 uint8x8x2_t test_vld2_u8(uint8_t const * a) {
4684 return vld2_u8(a);
4685 }
4686
4687 // CHECK-LABEL: @test_vld2_u16(
4688 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4689 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
4690 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4691 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_u16(uint16_t const * a)4692 uint16x4x2_t test_vld2_u16(uint16_t const * a) {
4693 return vld2_u16(a);
4694 }
4695
4696 // CHECK-LABEL: @test_vld2_u32(
4697 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
4698 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
4699 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4700 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_u32(uint32_t const * a)4701 uint32x2x2_t test_vld2_u32(uint32_t const * a) {
4702 return vld2_u32(a);
4703 }
4704
4705 // CHECK-LABEL: @test_vld2_u64(
4706 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
4707 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
4708 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
4709 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
test_vld2_u64(uint64_t const * a)4710 uint64x1x2_t test_vld2_u64(uint64_t const * a) {
4711 return vld2_u64(a);
4712 }
4713
4714 // CHECK-LABEL: @test_vld2_s8(
4715 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
4716 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
4717 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_s8(int8_t const * a)4718 int8x8x2_t test_vld2_s8(int8_t const * a) {
4719 return vld2_s8(a);
4720 }
4721
4722 // CHECK-LABEL: @test_vld2_s16(
4723 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
4724 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
4725 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4726 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_s16(int16_t const * a)4727 int16x4x2_t test_vld2_s16(int16_t const * a) {
4728 return vld2_s16(a);
4729 }
4730
4731 // CHECK-LABEL: @test_vld2_s32(
4732 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
4733 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
4734 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4735 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_s32(int32_t const * a)4736 int32x2x2_t test_vld2_s32(int32_t const * a) {
4737 return vld2_s32(a);
4738 }
4739
4740 // CHECK-LABEL: @test_vld2_s64(
4741 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
4742 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
4743 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
4744 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
test_vld2_s64(int64_t const * a)4745 int64x1x2_t test_vld2_s64(int64_t const * a) {
4746 return vld2_s64(a);
4747 }
4748
4749 // CHECK-LABEL: @test_vld2_f16(
4750 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
4751 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
4752 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
4753 // CHECK: [[VLD2_V:%.*]] = call { <4 x half>, <4 x half>
test_vld2_f16(float16_t const * a)4754 float16x4x2_t test_vld2_f16(float16_t const * a) {
4755 return vld2_f16(a);
4756 }
4757
4758 // CHECK-LABEL: @test_vld2_f32(
4759 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
4760 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
4761 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
4762 // CHECK: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float>
test_vld2_f32(float32_t const * a)4763 float32x2x2_t test_vld2_f32(float32_t const * a) {
4764 return vld2_f32(a);
4765 }
4766
4767 // CHECK-LABEL: @test_vld2_p8(
4768 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
4769 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
4770 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_p8(poly8_t const * a)4771 poly8x8x2_t test_vld2_p8(poly8_t const * a) {
4772 return vld2_p8(a);
4773 }
4774
4775 // CHECK-LABEL: @test_vld2_p16(
4776 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
4777 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
4778 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4779 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_p16(poly16_t const * a)4780 poly16x4x2_t test_vld2_p16(poly16_t const * a) {
4781 return vld2_p16(a);
4782 }
4783
4784 // CHECK-LABEL: @test_vld2q_lane_u16(
4785 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
4786 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
4787 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4788 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
4789 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4790 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4791 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
4792 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
4793 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4794 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4795 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
4796 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
4797 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4798 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4799 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4800 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
4801 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4802 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4803 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4804 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4805 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4806 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_u16(uint16_t const * a,uint16x8x2_t b)4807 uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) {
4808 return vld2q_lane_u16(a, b, 7);
4809 }
4810
4811 // CHECK-LABEL: @test_vld2q_lane_u32(
4812 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
4813 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
4814 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4815 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
4816 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
4817 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4818 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
4819 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
4820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4821 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4822 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
4823 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
4824 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
4825 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
4826 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4827 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
4828 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
4829 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
4830 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4831 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4832 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4833 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_lane_u32(uint32_t const * a,uint32x4x2_t b)4834 uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) {
4835 return vld2q_lane_u32(a, b, 3);
4836 }
4837
4838 // CHECK-LABEL: @test_vld2q_lane_s16(
4839 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
4840 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
4841 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4842 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
4843 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4844 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4845 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
4846 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
4847 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4848 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4849 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
4850 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
4851 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4852 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4853 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4854 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
4855 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4856 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4857 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4858 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4859 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4860 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_s16(int16_t const * a,int16x8x2_t b)4861 int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) {
4862 return vld2q_lane_s16(a, b, 7);
4863 }
4864
4865 // CHECK-LABEL: @test_vld2q_lane_s32(
4866 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
4867 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
4868 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4869 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
4870 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
4871 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4872 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
4873 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
4874 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4875 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4876 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
4877 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
4878 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
4879 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
4880 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4881 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
4882 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
4883 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
4884 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4885 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4886 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4887 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_lane_s32(int32_t const * a,int32x4x2_t b)4888 int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
4889 return vld2q_lane_s32(a, b, 3);
4890 }
4891
4892 // CHECK-LABEL: @test_vld2q_lane_f16(
4893 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
4894 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
4895 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4896 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
4897 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
4898 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4899 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
4900 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
4901 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4902 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4903 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
4904 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
4905 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
4906 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
4907 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
4908 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
4909 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
4910 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
4911 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
4912 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
4913 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
4914 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>
test_vld2q_lane_f16(float16_t const * a,float16x8x2_t b)4915 float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
4916 return vld2q_lane_f16(a, b, 7);
4917 }
4918
4919 // CHECK-LABEL: @test_vld2q_lane_f32(
4920 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
4921 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
4922 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4923 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
4924 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
4925 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4926 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
4927 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
4928 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4929 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4930 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
4931 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
4932 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
4933 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
4934 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
4935 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
4936 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
4937 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
4938 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
4939 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
4940 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
4941 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>
test_vld2q_lane_f32(float32_t const * a,float32x4x2_t b)4942 float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) {
4943 return vld2q_lane_f32(a, b, 3);
4944 }
4945
4946 // CHECK-LABEL: @test_vld2q_lane_p16(
4947 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
4948 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
4949 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4950 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
4951 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4952 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4953 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
4954 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
4955 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4956 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4957 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
4958 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
4959 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4960 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4961 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4962 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
4963 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4964 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4965 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4966 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4967 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4968 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_p16(poly16_t const * a,poly16x8x2_t b)4969 poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) {
4970 return vld2q_lane_p16(a, b, 7);
4971 }
4972
4973 // CHECK-LABEL: @test_vld2_lane_u8(
4974 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
4975 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
4976 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4977 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
4978 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
4979 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
4980 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
4981 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
4982 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
4983 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4984 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
4985 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
4986 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
4987 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
4988 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
4989 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
4990 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_u8(uint8_t const * a,uint8x8x2_t b)4991 uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) {
4992 return vld2_lane_u8(a, b, 7);
4993 }
4994
4995 // CHECK-LABEL: @test_vld2_lane_u16(
4996 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
4997 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
4998 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4999 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
5000 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5001 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5002 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
5003 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
5004 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5005 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
5006 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5007 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
5008 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5009 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5010 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5011 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
5012 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5013 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5014 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5015 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5016 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5017 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_u16(uint16_t const * a,uint16x4x2_t b)5018 uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) {
5019 return vld2_lane_u16(a, b, 3);
5020 }
5021
5022 // CHECK-LABEL: @test_vld2_lane_u32(
5023 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
5024 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
5025 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
5026 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
5027 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
5028 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5029 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
5030 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
5031 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5032 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
5033 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5034 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
5035 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
5036 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5037 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5038 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
5039 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5040 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5041 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5042 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5043 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5044 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_lane_u32(uint32_t const * a,uint32x2x2_t b)5045 uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) {
5046 return vld2_lane_u32(a, b, 1);
5047 }
5048
5049 // CHECK-LABEL: @test_vld2_lane_s8(
5050 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
5051 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
5052 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
5053 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
5054 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5055 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5056 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
5057 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
5058 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5059 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5060 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5061 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5062 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5063 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5064 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5065 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5066 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_s8(int8_t const * a,int8x8x2_t b)5067 int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) {
5068 return vld2_lane_s8(a, b, 7);
5069 }
5070
5071 // CHECK-LABEL: @test_vld2_lane_s16(
5072 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
5073 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
5074 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
5075 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
5076 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5077 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5078 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
5079 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
5080 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5081 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5082 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5083 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5084 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5085 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5086 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5087 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5088 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5089 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5090 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5091 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5092 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5093 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_s16(int16_t const * a,int16x4x2_t b)5094 int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) {
5095 return vld2_lane_s16(a, b, 3);
5096 }
5097
5098 // CHECK-LABEL: @test_vld2_lane_s32(
5099 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
5100 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
5101 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
5102 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
5103 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
5104 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5105 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
5106 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
5107 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5108 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5109 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5110 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5111 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
5112 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5113 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5114 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5115 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5116 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5117 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5118 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5119 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5120 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_lane_s32(int32_t const * a,int32x2x2_t b)5121 int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
5122 return vld2_lane_s32(a, b, 1);
5123 }
5124
5125 // CHECK-LABEL: @test_vld2_lane_f16(
5126 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
5127 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
5128 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
5129 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
5130 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
5131 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5132 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
5133 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
5134 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5135 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5136 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
5137 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5138 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
5139 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
5140 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5141 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5142 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
5143 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
5144 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5145 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
5146 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
5147 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x half>, <4 x half>
test_vld2_lane_f16(float16_t const * a,float16x4x2_t b)5148 float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
5149 return vld2_lane_f16(a, b, 3);
5150 }
5151
5152 // CHECK-LABEL: @test_vld2_lane_f32(
5153 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
5154 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
5155 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
5156 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
5157 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
5158 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5159 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
5160 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
5161 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5162 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5163 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
5164 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5165 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
5166 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
5167 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5168 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5169 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
5170 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
5171 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5172 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5173 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5174 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float>
test_vld2_lane_f32(float32_t const * a,float32x2x2_t b)5175 float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) {
5176 return vld2_lane_f32(a, b, 1);
5177 }
5178
5179 // CHECK-LABEL: @test_vld2_lane_p8(
5180 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
5181 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
5182 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
5183 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
5184 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5185 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5186 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
5187 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
5188 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5189 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5190 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5191 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5192 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5193 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5194 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5195 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5196 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_p8(poly8_t const * a,poly8x8x2_t b)5197 poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) {
5198 return vld2_lane_p8(a, b, 7);
5199 }
5200
5201 // CHECK-LABEL: @test_vld2_lane_p16(
5202 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
5203 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
5204 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
5205 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
5206 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5207 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5208 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
5209 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
5210 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5211 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5212 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5213 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5214 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5215 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5216 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5217 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5218 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5219 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5220 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5221 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5222 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5223 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_p16(poly16_t const * a,poly16x4x2_t b)5224 poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) {
5225 return vld2_lane_p16(a, b, 3);
5226 }
5227
5228 // CHECK-LABEL: @test_vld3q_u8(
5229 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
5230 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
5231 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_u8(uint8_t const * a)5232 uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
5233 return vld3q_u8(a);
5234 }
5235
5236 // CHECK-LABEL: @test_vld3q_u16(
5237 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5238 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5239 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5240 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_u16(uint16_t const * a)5241 uint16x8x3_t test_vld3q_u16(uint16_t const * a) {
5242 return vld3q_u16(a);
5243 }
5244
5245 // CHECK-LABEL: @test_vld3q_u32(
5246 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5247 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5248 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5249 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_u32(uint32_t const * a)5250 uint32x4x3_t test_vld3q_u32(uint32_t const * a) {
5251 return vld3q_u32(a);
5252 }
5253
5254 // CHECK-LABEL: @test_vld3q_s8(
5255 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
5256 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
5257 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_s8(int8_t const * a)5258 int8x16x3_t test_vld3q_s8(int8_t const * a) {
5259 return vld3q_s8(a);
5260 }
5261
5262 // CHECK-LABEL: @test_vld3q_s16(
5263 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5264 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
5265 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5266 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_s16(int16_t const * a)5267 int16x8x3_t test_vld3q_s16(int16_t const * a) {
5268 return vld3q_s16(a);
5269 }
5270
5271 // CHECK-LABEL: @test_vld3q_s32(
5272 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5273 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
5274 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5275 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_s32(int32_t const * a)5276 int32x4x3_t test_vld3q_s32(int32_t const * a) {
5277 return vld3q_s32(a);
5278 }
5279
5280 // CHECK-LABEL: @test_vld3q_f16(
5281 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5282 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
5283 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
5284 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
test_vld3q_f16(float16_t const * a)5285 float16x8x3_t test_vld3q_f16(float16_t const * a) {
5286 return vld3q_f16(a);
5287 }
5288
5289 // CHECK-LABEL: @test_vld3q_f32(
5290 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5291 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
5292 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
5293 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
test_vld3q_f32(float32_t const * a)5294 float32x4x3_t test_vld3q_f32(float32_t const * a) {
5295 return vld3q_f32(a);
5296 }
5297
5298 // CHECK-LABEL: @test_vld3q_p8(
5299 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
5300 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
5301 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_p8(poly8_t const * a)5302 poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
5303 return vld3q_p8(a);
5304 }
5305
5306 // CHECK-LABEL: @test_vld3q_p16(
5307 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5308 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
5309 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5310 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_p16(poly16_t const * a)5311 poly16x8x3_t test_vld3q_p16(poly16_t const * a) {
5312 return vld3q_p16(a);
5313 }
5314
5315 // CHECK-LABEL: @test_vld3_u8(
5316 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5317 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
5318 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_u8(uint8_t const * a)5319 uint8x8x3_t test_vld3_u8(uint8_t const * a) {
5320 return vld3_u8(a);
5321 }
5322
5323 // CHECK-LABEL: @test_vld3_u16(
5324 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5325 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
5326 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5327 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_u16(uint16_t const * a)5328 uint16x4x3_t test_vld3_u16(uint16_t const * a) {
5329 return vld3_u16(a);
5330 }
5331
5332 // CHECK-LABEL: @test_vld3_u32(
5333 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5334 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
5335 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5336 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_u32(uint32_t const * a)5337 uint32x2x3_t test_vld3_u32(uint32_t const * a) {
5338 return vld3_u32(a);
5339 }
5340
5341 // CHECK-LABEL: @test_vld3_u64(
5342 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
5343 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
5344 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
5345 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
test_vld3_u64(uint64_t const * a)5346 uint64x1x3_t test_vld3_u64(uint64_t const * a) {
5347 return vld3_u64(a);
5348 }
5349
5350 // CHECK-LABEL: @test_vld3_s8(
5351 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5352 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
5353 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_s8(int8_t const * a)5354 int8x8x3_t test_vld3_s8(int8_t const * a) {
5355 return vld3_s8(a);
5356 }
5357
5358 // CHECK-LABEL: @test_vld3_s16(
5359 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5360 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
5361 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5362 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_s16(int16_t const * a)5363 int16x4x3_t test_vld3_s16(int16_t const * a) {
5364 return vld3_s16(a);
5365 }
5366
5367 // CHECK-LABEL: @test_vld3_s32(
5368 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5369 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
5370 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5371 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_s32(int32_t const * a)5372 int32x2x3_t test_vld3_s32(int32_t const * a) {
5373 return vld3_s32(a);
5374 }
5375
5376 // CHECK-LABEL: @test_vld3_s64(
5377 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
5378 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
5379 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
5380 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
test_vld3_s64(int64_t const * a)5381 int64x1x3_t test_vld3_s64(int64_t const * a) {
5382 return vld3_s64(a);
5383 }
5384
5385 // CHECK-LABEL: @test_vld3_f16(
5386 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5387 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
5388 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
5389 // CHECK: [[VLD3_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
test_vld3_f16(float16_t const * a)5390 float16x4x3_t test_vld3_f16(float16_t const * a) {
5391 return vld3_f16(a);
5392 }
5393
5394 // CHECK-LABEL: @test_vld3_f32(
5395 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5396 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
5397 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
5398 // CHECK: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
test_vld3_f32(float32_t const * a)5399 float32x2x3_t test_vld3_f32(float32_t const * a) {
5400 return vld3_f32(a);
5401 }
5402
5403 // CHECK-LABEL: @test_vld3_p8(
5404 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5405 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
5406 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_p8(poly8_t const * a)5407 poly8x8x3_t test_vld3_p8(poly8_t const * a) {
5408 return vld3_p8(a);
5409 }
5410
5411 // CHECK-LABEL: @test_vld3_p16(
5412 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5413 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
5414 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5415 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_p16(poly16_t const * a)5416 poly16x4x3_t test_vld3_p16(poly16_t const * a) {
5417 return vld3_p16(a);
5418 }
5419
5420 // CHECK-LABEL: @test_vld3q_lane_u16(
5421 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
5422 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
5423 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5424 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
5425 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5426 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5427 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
5428 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
5429 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5430 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5431 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5432 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5433 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5434 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5435 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5436 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5437 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5438 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5439 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5440 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5441 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5442 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5443 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5444 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5445 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5446 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5447 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_u16(uint16_t const * a,uint16x8x3_t b)5448 uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) {
5449 return vld3q_lane_u16(a, b, 7);
5450 }
5451
5452 // CHECK-LABEL: @test_vld3q_lane_u32(
5453 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
5454 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
5455 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5456 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
5457 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
5458 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5459 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
5460 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
5461 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5462 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5463 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5464 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5465 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
5466 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5467 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5468 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5469 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5470 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5471 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5472 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5473 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
5474 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
5475 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5476 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5477 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5478 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5479 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_lane_u32(uint32_t const * a,uint32x4x3_t b)5480 uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) {
5481 return vld3q_lane_u32(a, b, 3);
5482 }
5483
5484 // CHECK-LABEL: @test_vld3q_lane_s16(
5485 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
5486 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
5487 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5488 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
5489 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5490 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5491 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
5492 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
5493 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5494 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
5495 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5496 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5497 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5498 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5499 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5500 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5501 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5502 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5503 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5504 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5505 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5506 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5507 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5508 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5509 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5510 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5511 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_s16(int16_t const * a,int16x8x3_t b)5512 int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) {
5513 return vld3q_lane_s16(a, b, 7);
5514 }
5515
5516 // CHECK-LABEL: @test_vld3q_lane_s32(
5517 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
5518 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
5519 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5520 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
5521 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
5522 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5523 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
5524 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
5525 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5526 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
5527 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5528 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5529 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
5530 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5531 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5532 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5533 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5534 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5535 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5536 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5537 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
5538 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
5539 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5540 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5541 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5542 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5543 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_lane_s32(int32_t const * a,int32x4x3_t b)5544 int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
5545 return vld3q_lane_s32(a, b, 3);
5546 }
5547
5548 // CHECK-LABEL: @test_vld3q_lane_f16(
5549 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
5550 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
5551 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5552 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
5553 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
5554 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5555 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
5556 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
5557 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5558 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
5559 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
5560 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5561 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
5562 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
5563 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
5564 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5565 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
5566 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
5567 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
5568 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5569 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
5570 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
5571 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
5572 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
5573 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
5574 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
5575 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
test_vld3q_lane_f16(float16_t const * a,float16x8x3_t b)5576 float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
5577 return vld3q_lane_f16(a, b, 7);
5578 }
5579
5580 // CHECK-LABEL: @test_vld3q_lane_f32(
5581 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
5582 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
5583 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5584 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
5585 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
5586 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5587 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
5588 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
5589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5590 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
5591 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
5592 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5593 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
5594 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
5595 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
5596 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5597 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
5598 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
5599 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
5600 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5601 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
5602 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
5603 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
5604 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
5605 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
5606 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
5607 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
test_vld3q_lane_f32(float32_t const * a,float32x4x3_t b)5608 float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) {
5609 return vld3q_lane_f32(a, b, 3);
5610 }
5611
5612 // CHECK-LABEL: @test_vld3q_lane_p16(
5613 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
5614 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
5615 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5616 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
5617 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5618 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5619 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
5620 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
5621 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5622 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
5623 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5624 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5625 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5626 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5627 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5628 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5629 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5630 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5631 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5632 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5633 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5634 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5635 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5636 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5637 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5638 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5639 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_p16(poly16_t const * a,poly16x8x3_t b)5640 poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) {
5641 return vld3q_lane_p16(a, b, 7);
5642 }
5643
5644 // CHECK-LABEL: @test_vld3_lane_u8(
5645 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
5646 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
5647 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5648 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
5649 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5650 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5651 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
5652 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
5653 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5654 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
5655 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5656 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5657 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5658 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5659 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5660 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5661 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5662 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5663 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5664 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_u8(uint8_t const * a,uint8x8x3_t b)5665 uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) {
5666 return vld3_lane_u8(a, b, 7);
5667 }
5668
5669 // CHECK-LABEL: @test_vld3_lane_u16(
5670 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
5671 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
5672 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5673 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
5674 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5675 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5676 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
5677 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
5678 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5679 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
5680 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5681 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5682 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5683 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5684 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5685 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5686 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5687 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5688 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5689 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5690 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5691 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5692 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5693 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5694 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5695 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5696 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_u16(uint16_t const * a,uint16x4x3_t b)5697 uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) {
5698 return vld3_lane_u16(a, b, 3);
5699 }
5700
5701 // CHECK-LABEL: @test_vld3_lane_u32(
5702 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
5703 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
5704 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5705 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
5706 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
5707 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5708 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
5709 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
5710 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5711 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
5712 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5713 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5714 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
5715 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5716 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5717 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5718 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5719 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5720 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5721 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5722 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
5723 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
5724 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5725 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5726 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5727 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5728 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_lane_u32(uint32_t const * a,uint32x2x3_t b)5729 uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) {
5730 return vld3_lane_u32(a, b, 1);
5731 }
5732
5733 // CHECK-LABEL: @test_vld3_lane_s8(
5734 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
5735 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
5736 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5737 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
5738 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5739 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5740 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
5741 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
5742 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5743 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
5744 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5745 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5746 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5747 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5748 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5749 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5750 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5751 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5752 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5753 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_s8(int8_t const * a,int8x8x3_t b)5754 int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) {
5755 return vld3_lane_s8(a, b, 7);
5756 }
5757
5758 // CHECK-LABEL: @test_vld3_lane_s16(
5759 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
5760 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
5761 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5762 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
5763 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5764 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5765 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
5766 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
5767 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5768 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
5769 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5770 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5771 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5772 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5773 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5774 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5775 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5776 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5777 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5778 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5779 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5780 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5781 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5782 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5783 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5784 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5785 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_s16(int16_t const * a,int16x4x3_t b)5786 int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) {
5787 return vld3_lane_s16(a, b, 3);
5788 }
5789
5790 // CHECK-LABEL: @test_vld3_lane_s32(
5791 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
5792 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
5793 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5794 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
5795 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
5796 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5797 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
5798 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
5799 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5800 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
5801 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5802 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5803 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
5804 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5805 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5806 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5807 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5808 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5809 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5810 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5811 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
5812 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
5813 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5814 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5815 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5816 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5817 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_lane_s32(int32_t const * a,int32x2x3_t b)5818 int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
5819 return vld3_lane_s32(a, b, 1);
5820 }
5821
5822 // CHECK-LABEL: @test_vld3_lane_f16(
5823 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
5824 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
5825 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5826 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
5827 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
5828 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5829 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
5830 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
5831 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5832 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
5833 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
5834 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5835 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
5836 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
5837 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5838 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5839 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
5840 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
5841 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5842 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5843 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
5844 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
5845 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
5846 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
5847 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
5848 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
5849 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
test_vld3_lane_f16(float16_t const * a,float16x4x3_t b)5850 float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
5851 return vld3_lane_f16(a, b, 3);
5852 }
5853
5854 // CHECK-LABEL: @test_vld3_lane_f32(
5855 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
5856 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
5857 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5858 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
5859 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
5860 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5861 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
5862 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
5863 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5864 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
5865 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
5866 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5867 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
5868 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
5869 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5870 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5871 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
5872 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
5873 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5874 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5875 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
5876 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
5877 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
5878 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5879 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5880 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
5881 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
test_vld3_lane_f32(float32_t const * a,float32x2x3_t b)5882 float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) {
5883 return vld3_lane_f32(a, b, 1);
5884 }
5885
5886 // CHECK-LABEL: @test_vld3_lane_p8(
5887 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
5888 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
5889 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5890 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
5891 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5892 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5893 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
5894 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
5895 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5896 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
5897 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5898 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5899 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5900 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5901 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5902 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5903 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5904 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5905 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5906 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_p8(poly8_t const * a,poly8x8x3_t b)5907 poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) {
5908 return vld3_lane_p8(a, b, 7);
5909 }
5910
5911 // CHECK-LABEL: @test_vld3_lane_p16(
5912 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
5913 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
5914 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5915 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
5916 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5917 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5918 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
5919 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
5920 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5921 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
5922 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5923 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5924 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5925 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5926 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5927 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5928 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5929 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5930 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5931 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5932 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5933 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5934 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5935 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5936 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5937 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5938 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_p16(poly16_t const * a,poly16x4x3_t b)5939 poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) {
5940 return vld3_lane_p16(a, b, 3);
5941 }
5942
5943 // CHECK-LABEL: @test_vld4q_u8(
5944 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
5945 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
5946 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_u8(uint8_t const * a)5947 uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
5948 return vld4q_u8(a);
5949 }
5950
5951 // CHECK-LABEL: @test_vld4q_u16(
5952 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
5953 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
5954 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5955 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_u16(uint16_t const * a)5956 uint16x8x4_t test_vld4q_u16(uint16_t const * a) {
5957 return vld4q_u16(a);
5958 }
5959
5960 // CHECK-LABEL: @test_vld4q_u32(
5961 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
5962 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
5963 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5964 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_u32(uint32_t const * a)5965 uint32x4x4_t test_vld4q_u32(uint32_t const * a) {
5966 return vld4q_u32(a);
5967 }
5968
5969 // CHECK-LABEL: @test_vld4q_s8(
5970 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
5971 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
5972 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_s8(int8_t const * a)5973 int8x16x4_t test_vld4q_s8(int8_t const * a) {
5974 return vld4q_s8(a);
5975 }
5976
5977 // CHECK-LABEL: @test_vld4q_s16(
5978 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
5979 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
5980 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5981 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_s16(int16_t const * a)5982 int16x8x4_t test_vld4q_s16(int16_t const * a) {
5983 return vld4q_s16(a);
5984 }
5985
5986 // CHECK-LABEL: @test_vld4q_s32(
5987 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
5988 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
5989 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5990 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_s32(int32_t const * a)5991 int32x4x4_t test_vld4q_s32(int32_t const * a) {
5992 return vld4q_s32(a);
5993 }
5994
5995 // CHECK-LABEL: @test_vld4q_f16(
5996 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
5997 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
5998 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
5999 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
test_vld4q_f16(float16_t const * a)6000 float16x8x4_t test_vld4q_f16(float16_t const * a) {
6001 return vld4q_f16(a);
6002 }
6003
6004 // CHECK-LABEL: @test_vld4q_f32(
6005 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
6006 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
6007 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
6008 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
test_vld4q_f32(float32_t const * a)6009 float32x4x4_t test_vld4q_f32(float32_t const * a) {
6010 return vld4q_f32(a);
6011 }
6012
6013 // CHECK-LABEL: @test_vld4q_p8(
6014 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
6015 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
6016 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_p8(poly8_t const * a)6017 poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
6018 return vld4q_p8(a);
6019 }
6020
6021 // CHECK-LABEL: @test_vld4q_p16(
6022 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
6023 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
6024 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6025 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_p16(poly16_t const * a)6026 poly16x8x4_t test_vld4q_p16(poly16_t const * a) {
6027 return vld4q_p16(a);
6028 }
6029
6030 // CHECK-LABEL: @test_vld4_u8(
6031 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
6032 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
6033 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_u8(uint8_t const * a)6034 uint8x8x4_t test_vld4_u8(uint8_t const * a) {
6035 return vld4_u8(a);
6036 }
6037
6038 // CHECK-LABEL: @test_vld4_u16(
6039 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
6040 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
6041 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6042 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_u16(uint16_t const * a)6043 uint16x4x4_t test_vld4_u16(uint16_t const * a) {
6044 return vld4_u16(a);
6045 }
6046
6047 // CHECK-LABEL: @test_vld4_u32(
6048 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
6049 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
6050 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
6051 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_u32(uint32_t const * a)6052 uint32x2x4_t test_vld4_u32(uint32_t const * a) {
6053 return vld4_u32(a);
6054 }
6055
6056 // CHECK-LABEL: @test_vld4_u64(
6057 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
6058 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
6059 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
6060 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
test_vld4_u64(uint64_t const * a)6061 uint64x1x4_t test_vld4_u64(uint64_t const * a) {
6062 return vld4_u64(a);
6063 }
6064
6065 // CHECK-LABEL: @test_vld4_s8(
6066 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
6067 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
6068 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_s8(int8_t const * a)6069 int8x8x4_t test_vld4_s8(int8_t const * a) {
6070 return vld4_s8(a);
6071 }
6072
6073 // CHECK-LABEL: @test_vld4_s16(
6074 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
6075 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
6076 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6077 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_s16(int16_t const * a)6078 int16x4x4_t test_vld4_s16(int16_t const * a) {
6079 return vld4_s16(a);
6080 }
6081
6082 // CHECK-LABEL: @test_vld4_s32(
6083 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
6084 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
6085 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
6086 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_s32(int32_t const * a)6087 int32x2x4_t test_vld4_s32(int32_t const * a) {
6088 return vld4_s32(a);
6089 }
6090
6091 // CHECK-LABEL: @test_vld4_s64(
6092 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
6093 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
6094 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
6095 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
test_vld4_s64(int64_t const * a)6096 int64x1x4_t test_vld4_s64(int64_t const * a) {
6097 return vld4_s64(a);
6098 }
6099
6100 // CHECK-LABEL: @test_vld4_f16(
6101 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
6102 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
6103 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
6104 // CHECK: [[VLD4_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
test_vld4_f16(float16_t const * a)6105 float16x4x4_t test_vld4_f16(float16_t const * a) {
6106 return vld4_f16(a);
6107 }
6108
6109 // CHECK-LABEL: @test_vld4_f32(
6110 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
6111 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
6112 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
6113 // CHECK: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
test_vld4_f32(float32_t const * a)6114 float32x2x4_t test_vld4_f32(float32_t const * a) {
6115 return vld4_f32(a);
6116 }
6117
6118 // CHECK-LABEL: @test_vld4_p8(
6119 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
6120 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
6121 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_p8(poly8_t const * a)6122 poly8x8x4_t test_vld4_p8(poly8_t const * a) {
6123 return vld4_p8(a);
6124 }
6125
6126 // CHECK-LABEL: @test_vld4_p16(
6127 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
6128 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
6129 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6130 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_p16(poly16_t const * a)6131 poly16x4x4_t test_vld4_p16(poly16_t const * a) {
6132 return vld4_p16(a);
6133 }
6134
6135 // CHECK-LABEL: @test_vld4q_lane_u16(
6136 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
6137 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
6138 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
6139 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
6140 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6141 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6142 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
6143 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
6144 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6145 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
6146 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6147 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6148 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6149 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6150 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6151 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6152 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6153 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6154 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6155 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6156 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6157 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6158 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6159 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6160 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6161 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6162 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6163 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6164 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6165 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6166 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6167 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_u16(uint16_t const * a,uint16x8x4_t b)6168 uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) {
6169 return vld4q_lane_u16(a, b, 7);
6170 }
6171
6172 // CHECK-LABEL: @test_vld4q_lane_u32(
6173 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
6174 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
6175 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
6176 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
6177 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
6178 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6179 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
6180 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
6181 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6182 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
6183 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6184 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6185 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
6186 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6187 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6188 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6189 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6190 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6191 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6192 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6193 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6194 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6195 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6196 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6197 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
6198 // CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
6199 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
6200 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6201 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6202 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6203 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
6204 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_lane_u32(uint32_t const * a,uint32x4x4_t b)6205 uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) {
6206 return vld4q_lane_u32(a, b, 3);
6207 }
6208
6209 // CHECK-LABEL: @test_vld4q_lane_s16(
6210 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
6211 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
6212 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
6213 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
6214 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6215 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6216 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
6217 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
6218 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6219 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
6220 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6221 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6222 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6223 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6224 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6225 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6226 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6227 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6228 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6229 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6230 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6231 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6232 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6233 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6234 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6235 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6236 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6237 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6238 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6239 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6240 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6241 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_s16(int16_t const * a,int16x8x4_t b)6242 int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) {
6243 return vld4q_lane_s16(a, b, 7);
6244 }
6245
6246 // CHECK-LABEL: @test_vld4q_lane_s32(
6247 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
6248 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
6249 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
6250 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
6251 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
6252 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6253 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
6254 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
6255 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6256 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
6257 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6258 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6259 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
6260 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6261 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6262 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6263 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6264 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6265 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6266 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6267 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6268 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6269 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6270 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6271 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
6272 // CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
6273 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
6274 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6275 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6276 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6277 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
6278 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_lane_s32(int32_t const * a,int32x4x4_t b)6279 int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
6280 return vld4q_lane_s32(a, b, 3);
6281 }
6282
6283 // CHECK-LABEL: @test_vld4q_lane_f16(
6284 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
6285 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
6286 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
6287 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
6288 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
6289 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6290 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
6291 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
6292 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6293 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
6294 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
6295 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6296 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
6297 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
6298 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
6299 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6300 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
6301 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
6302 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
6303 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6304 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
6305 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
6306 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
6307 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6308 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
6309 // CHECK: [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
6310 // CHECK: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8>
6311 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
6312 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
6313 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
6314 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x half>
6315 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
test_vld4q_lane_f16(float16_t const * a,float16x8x4_t b)6316 float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
6317 return vld4q_lane_f16(a, b, 7);
6318 }
6319
6320 // CHECK-LABEL: @test_vld4q_lane_f32(
6321 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
6322 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
6323 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
6324 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
6325 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
6326 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6327 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
6328 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
6329 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6330 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
6331 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
6332 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6333 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
6334 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
6335 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
6336 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6337 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
6338 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
6339 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
6340 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6341 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
6342 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
6343 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
6344 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6345 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
6346 // CHECK: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
6347 // CHECK: [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8>
6348 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
6349 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
6350 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
6351 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float>
6352 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
test_vld4q_lane_f32(float32_t const * a,float32x4x4_t b)6353 float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) {
6354 return vld4q_lane_f32(a, b, 3);
6355 }
6356
6357 // CHECK-LABEL: @test_vld4q_lane_p16(
6358 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
6359 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
6360 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
6361 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
6362 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6363 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6364 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
6365 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
6366 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6367 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
6368 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6369 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6370 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6371 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6372 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6373 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6374 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6375 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6376 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6377 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6378 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6379 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6380 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6381 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6382 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6383 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6384 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6385 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6386 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6387 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6388 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6389 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_p16(poly16_t const * a,poly16x8x4_t b)6390 poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) {
6391 return vld4q_lane_p16(a, b, 7);
6392 }
6393
6394 // CHECK-LABEL: @test_vld4_lane_u8(
6395 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
6396 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
6397 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
6398 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
6399 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6400 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6401 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
6402 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
6403 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6404 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
6405 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6406 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6407 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6408 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6409 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6410 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6411 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6412 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6413 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6414 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6415 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6416 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6417 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_u8(uint8_t const * a,uint8x8x4_t b)6418 uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) {
6419 return vld4_lane_u8(a, b, 7);
6420 }
6421
6422 // CHECK-LABEL: @test_vld4_lane_u16(
6423 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
6424 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
6425 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
6426 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
6427 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6428 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6429 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
6430 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
6431 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6432 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
6433 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6434 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6435 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6436 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6437 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6438 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6439 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6440 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6441 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6442 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6443 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6444 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6445 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6446 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6447 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6448 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6449 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6450 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6451 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6452 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6453 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6454 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_u16(uint16_t const * a,uint16x4x4_t b)6455 uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) {
6456 return vld4_lane_u16(a, b, 3);
6457 }
6458
6459 // CHECK-LABEL: @test_vld4_lane_u32(
6460 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
6461 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
6462 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
6463 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
6464 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
6465 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6466 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
6467 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
6468 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6469 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
6470 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6471 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6472 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
6473 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6474 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6475 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6476 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6477 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6478 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6479 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6480 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
6481 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
6482 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6483 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6484 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
6485 // CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
6486 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6487 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6488 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6489 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6490 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6491 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_lane_u32(uint32_t const * a,uint32x2x4_t b)6492 uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) {
6493 return vld4_lane_u32(a, b, 1);
6494 }
6495
6496 // CHECK-LABEL: @test_vld4_lane_s8(
6497 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
6498 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
6499 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
6500 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
6501 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6502 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6503 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
6504 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
6505 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6506 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
6507 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6508 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6509 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6510 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6511 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6512 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6513 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6514 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6515 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6516 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6517 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6518 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6519 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_s8(int8_t const * a,int8x8x4_t b)6520 int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) {
6521 return vld4_lane_s8(a, b, 7);
6522 }
6523
6524 // CHECK-LABEL: @test_vld4_lane_s16(
6525 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
6526 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
6527 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
6528 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
6529 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6530 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6531 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
6532 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
6533 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6534 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
6535 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6536 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6537 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6538 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6539 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6540 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6541 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6542 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6543 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6544 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6545 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6546 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6547 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6548 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6549 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6550 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6551 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6552 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6553 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6554 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6555 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6556 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_s16(int16_t const * a,int16x4x4_t b)6557 int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) {
6558 return vld4_lane_s16(a, b, 3);
6559 }
6560
6561 // CHECK-LABEL: @test_vld4_lane_s32(
6562 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
6563 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
6564 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
6565 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
6566 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
6567 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6568 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
6569 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
6570 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6571 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
6572 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6573 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6574 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
6575 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6576 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6577 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6578 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6579 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6580 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6581 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6582 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
6583 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
6584 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6585 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6586 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
6587 // CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
6588 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6589 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6590 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6591 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6592 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6593 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_lane_s32(int32_t const * a,int32x2x4_t b)6594 int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
6595 return vld4_lane_s32(a, b, 1);
6596 }
6597
6598 // CHECK-LABEL: @test_vld4_lane_f16(
6599 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
6600 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
6601 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
6602 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
6603 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
6604 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6605 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
6606 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
6607 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6608 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
6609 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
6610 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6611 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
6612 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
6613 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
6614 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6615 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
6616 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
6617 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
6618 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6619 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
6620 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
6621 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
6622 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6623 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
6624 // CHECK: [[TMP11:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
6625 // CHECK: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8>
6626 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
6627 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
6628 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
6629 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
6630 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
test_vld4_lane_f16(float16_t const * a,float16x4x4_t b)6631 float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
6632 return vld4_lane_f16(a, b, 3);
6633 }
6634
6635 // CHECK-LABEL: @test_vld4_lane_f32(
6636 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
6637 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
6638 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
6639 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
6640 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
6641 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6642 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
6643 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
6644 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6645 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
6646 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
6647 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6648 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
6649 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
6650 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
6651 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6652 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
6653 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
6654 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
6655 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6656 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
6657 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
6658 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
6659 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6660 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
6661 // CHECK: [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
6662 // CHECK: [[TMP12:%.*]] = bitcast <2 x float> [[TMP11]] to <8 x i8>
6663 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
6664 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
6665 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
6666 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float>
6667 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
test_vld4_lane_f32(float32_t const * a,float32x2x4_t b)6668 float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) {
6669 return vld4_lane_f32(a, b, 1);
6670 }
6671
6672 // CHECK-LABEL: @test_vld4_lane_p8(
6673 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
6674 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
6675 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
6676 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
6677 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6678 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6679 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
6680 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
6681 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6682 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
6683 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6684 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6685 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6686 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6687 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6688 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6689 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6690 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6691 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6692 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6693 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6694 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6695 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_p8(poly8_t const * a,poly8x8x4_t b)6696 poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) {
6697 return vld4_lane_p8(a, b, 7);
6698 }
6699
6700 // CHECK-LABEL: @test_vld4_lane_p16(
6701 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
6702 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
6703 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
6704 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
6705 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6706 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6707 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
6708 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
6709 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6710 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
6711 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6712 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6713 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6714 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6715 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6716 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6717 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6718 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6719 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6720 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6721 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6722 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6723 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6724 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6725 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6726 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6727 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6728 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6729 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6730 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6731 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6732 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_p16(poly16_t const * a,poly16x4x4_t b)6733 poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) {
6734 return vld4_lane_p16(a, b, 3);
6735 }
6736
6737 // CHECK-LABEL: @test_vmax_s8(
6738 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
6739 // CHECK: ret <8 x i8> [[VMAX_V_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)6740 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
6741 return vmax_s8(a, b);
6742 }
6743
6744 // CHECK-LABEL: @test_vmax_s16(
6745 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6746 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6747 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
6748 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6749 // CHECK: ret <4 x i16> [[VMAX_V2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)6750 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
6751 return vmax_s16(a, b);
6752 }
6753
6754 // CHECK-LABEL: @test_vmax_s32(
6755 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6756 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6757 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
6758 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6759 // CHECK: ret <2 x i32> [[VMAX_V2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)6760 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
6761 return vmax_s32(a, b);
6762 }
6763
6764 // CHECK-LABEL: @test_vmax_u8(
6765 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
6766 // CHECK: ret <8 x i8> [[VMAX_V_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)6767 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
6768 return vmax_u8(a, b);
6769 }
6770
6771 // CHECK-LABEL: @test_vmax_u16(
6772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6773 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6774 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
6775 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6776 // CHECK: ret <4 x i16> [[VMAX_V2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)6777 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
6778 return vmax_u16(a, b);
6779 }
6780
6781 // CHECK-LABEL: @test_vmax_u32(
6782 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6783 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6784 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
6785 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6786 // CHECK: ret <2 x i32> [[VMAX_V2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)6787 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
6788 return vmax_u32(a, b);
6789 }
6790
6791 // CHECK-LABEL: @test_vmax_f32(
6792 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6793 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6794 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b)
6795 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8>
6796 // CHECK: ret <2 x float> [[VMAX_V2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)6797 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
6798 return vmax_f32(a, b);
6799 }
6800
6801 // CHECK-LABEL: @test_vmaxq_s8(
6802 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b)
6803 // CHECK: ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)6804 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
6805 return vmaxq_s8(a, b);
6806 }
6807
6808 // CHECK-LABEL: @test_vmaxq_s16(
6809 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6810 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6811 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b)
6812 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6813 // CHECK: ret <8 x i16> [[VMAXQ_V2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)6814 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
6815 return vmaxq_s16(a, b);
6816 }
6817
6818 // CHECK-LABEL: @test_vmaxq_s32(
6819 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6820 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6821 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b)
6822 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6823 // CHECK: ret <4 x i32> [[VMAXQ_V2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)6824 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
6825 return vmaxq_s32(a, b);
6826 }
6827
6828 // CHECK-LABEL: @test_vmaxq_u8(
6829 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b)
6830 // CHECK: ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)6831 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
6832 return vmaxq_u8(a, b);
6833 }
6834
6835 // CHECK-LABEL: @test_vmaxq_u16(
6836 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6837 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6838 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b)
6839 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6840 // CHECK: ret <8 x i16> [[VMAXQ_V2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)6841 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
6842 return vmaxq_u16(a, b);
6843 }
6844
6845 // CHECK-LABEL: @test_vmaxq_u32(
6846 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6847 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6848 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b)
6849 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6850 // CHECK: ret <4 x i32> [[VMAXQ_V2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)6851 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
6852 return vmaxq_u32(a, b);
6853 }
6854
6855 // CHECK-LABEL: @test_vmaxq_f32(
6856 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6857 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6858 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b)
6859 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8>
6860 // CHECK: ret <4 x float> [[VMAXQ_V2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)6861 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
6862 return vmaxq_f32(a, b);
6863 }
6864
6865 // CHECK-LABEL: @test_vmin_s8(
6866 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b)
6867 // CHECK: ret <8 x i8> [[VMIN_V_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)6868 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
6869 return vmin_s8(a, b);
6870 }
6871
6872 // CHECK-LABEL: @test_vmin_s16(
6873 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6874 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6875 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b)
6876 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6877 // CHECK: ret <4 x i16> [[VMIN_V2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)6878 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
6879 return vmin_s16(a, b);
6880 }
6881
6882 // CHECK-LABEL: @test_vmin_s32(
6883 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6884 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6885 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b)
6886 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6887 // CHECK: ret <2 x i32> [[VMIN_V2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)6888 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
6889 return vmin_s32(a, b);
6890 }
6891
6892 // CHECK-LABEL: @test_vmin_u8(
6893 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b)
6894 // CHECK: ret <8 x i8> [[VMIN_V_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)6895 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
6896 return vmin_u8(a, b);
6897 }
6898
6899 // CHECK-LABEL: @test_vmin_u16(
6900 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6901 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6902 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b)
6903 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6904 // CHECK: ret <4 x i16> [[VMIN_V2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)6905 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
6906 return vmin_u16(a, b);
6907 }
6908
6909 // CHECK-LABEL: @test_vmin_u32(
6910 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6911 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6912 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b)
6913 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6914 // CHECK: ret <2 x i32> [[VMIN_V2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)6915 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
6916 return vmin_u32(a, b);
6917 }
6918
6919 // CHECK-LABEL: @test_vmin_f32(
6920 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6921 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6922 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b)
6923 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8>
6924 // CHECK: ret <2 x float> [[VMIN_V2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)6925 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
6926 return vmin_f32(a, b);
6927 }
6928
6929 // CHECK-LABEL: @test_vminq_s8(
6930 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b)
6931 // CHECK: ret <16 x i8> [[VMINQ_V_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)6932 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
6933 return vminq_s8(a, b);
6934 }
6935
6936 // CHECK-LABEL: @test_vminq_s16(
6937 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6938 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6939 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b)
6940 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6941 // CHECK: ret <8 x i16> [[VMINQ_V2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)6942 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
6943 return vminq_s16(a, b);
6944 }
6945
6946 // CHECK-LABEL: @test_vminq_s32(
6947 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6948 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6949 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b)
6950 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6951 // CHECK: ret <4 x i32> [[VMINQ_V2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)6952 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
6953 return vminq_s32(a, b);
6954 }
6955
6956 // CHECK-LABEL: @test_vminq_u8(
6957 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b)
6958 // CHECK: ret <16 x i8> [[VMINQ_V_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)6959 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
6960 return vminq_u8(a, b);
6961 }
6962
6963 // CHECK-LABEL: @test_vminq_u16(
6964 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6965 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6966 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b)
6967 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6968 // CHECK: ret <8 x i16> [[VMINQ_V2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)6969 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
6970 return vminq_u16(a, b);
6971 }
6972
6973 // CHECK-LABEL: @test_vminq_u32(
6974 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6975 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6976 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b)
6977 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6978 // CHECK: ret <4 x i32> [[VMINQ_V2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)6979 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
6980 return vminq_u32(a, b);
6981 }
6982
6983 // CHECK-LABEL: @test_vminq_f32(
6984 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6985 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6986 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b)
6987 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8>
6988 // CHECK: ret <4 x float> [[VMINQ_V2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)6989 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
6990 return vminq_f32(a, b);
6991 }
6992
6993 // CHECK-LABEL: @test_vmla_s8(
6994 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
6995 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
6996 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t a,int8x8_t b,int8x8_t c)6997 int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
6998 return vmla_s8(a, b, c);
6999 }
7000
7001 // CHECK-LABEL: @test_vmla_s16(
7002 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7003 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7004 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_s16(int16x4_t a,int16x4_t b,int16x4_t c)7005 int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7006 return vmla_s16(a, b, c);
7007 }
7008
7009 // CHECK-LABEL: @test_vmla_s32(
7010 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7011 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7012 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t a,int32x2_t b,int32x2_t c)7013 int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7014 return vmla_s32(a, b, c);
7015 }
7016
7017 // CHECK-LABEL: @test_vmla_f32(
7018 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
7019 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
7020 // CHECK: ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t a,float32x2_t b,float32x2_t c)7021 float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7022 return vmla_f32(a, b, c);
7023 }
7024
7025 // CHECK-LABEL: @test_vmla_u8(
7026 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7027 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
7028 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)7029 uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
7030 return vmla_u8(a, b, c);
7031 }
7032
7033 // CHECK-LABEL: @test_vmla_u16(
7034 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7035 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7036 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7037 uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7038 return vmla_u16(a, b, c);
7039 }
7040
7041 // CHECK-LABEL: @test_vmla_u32(
7042 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7043 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7044 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7045 uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7046 return vmla_u32(a, b, c);
7047 }
7048
7049 // CHECK-LABEL: @test_vmlaq_s8(
7050 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7051 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
7052 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)7053 int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
7054 return vmlaq_s8(a, b, c);
7055 }
7056
7057 // CHECK-LABEL: @test_vmlaq_s16(
7058 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7059 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7060 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)7061 int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
7062 return vmlaq_s16(a, b, c);
7063 }
7064
7065 // CHECK-LABEL: @test_vmlaq_s32(
7066 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7067 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7068 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)7069 int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
7070 return vmlaq_s32(a, b, c);
7071 }
7072
7073 // CHECK-LABEL: @test_vmlaq_f32(
7074 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
7075 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
7076 // CHECK: ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7077 float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
7078 return vmlaq_f32(a, b, c);
7079 }
7080
7081 // CHECK-LABEL: @test_vmlaq_u8(
7082 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7083 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
7084 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)7085 uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
7086 return vmlaq_u8(a, b, c);
7087 }
7088
7089 // CHECK-LABEL: @test_vmlaq_u16(
7090 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7091 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7092 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)7093 uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
7094 return vmlaq_u16(a, b, c);
7095 }
7096
7097 // CHECK-LABEL: @test_vmlaq_u32(
7098 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7099 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7100 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)7101 uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
7102 return vmlaq_u32(a, b, c);
7103 }
7104
7105 // CHECK-LABEL: @test_vmlal_s8(
7106 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
7107 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7108 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7109 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7110 return vmlal_s8(a, b, c);
7111 }
7112
7113 // CHECK-LABEL: @test_vmlal_s16(
7114 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7115 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7116 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
7117 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7118 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7119 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7120 return vmlal_s16(a, b, c);
7121 }
7122
7123 // CHECK-LABEL: @test_vmlal_s32(
7124 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7125 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7126 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
7127 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7128 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7129 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7130 return vmlal_s32(a, b, c);
7131 }
7132
7133 // CHECK-LABEL: @test_vmlal_u8(
7134 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
7135 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7136 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7137 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7138 return vmlal_u8(a, b, c);
7139 }
7140
7141 // CHECK-LABEL: @test_vmlal_u16(
7142 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7143 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7144 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
7145 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7146 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7147 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7148 return vmlal_u16(a, b, c);
7149 }
7150
7151 // CHECK-LABEL: @test_vmlal_u32(
7152 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7153 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7154 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
7155 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7156 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7157 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7158 return vmlal_u32(a, b, c);
7159 }
7160
7161 // CHECK-LABEL: @test_vmlal_lane_s16(
7162 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7163 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7164 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7165 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7166 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7167 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7168 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
7169 // CHECK: ret <4 x i32> [[ADD]]
test_vmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)7170 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7171 return vmlal_lane_s16(a, b, c, 3);
7172 }
7173
7174 // CHECK-LABEL: @test_vmlal_lane_s32(
7175 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7176 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7177 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7178 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7179 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7180 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7181 // CHECK: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
7182 // CHECK: ret <2 x i64> [[ADD]]
test_vmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)7183 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7184 return vmlal_lane_s32(a, b, c, 1);
7185 }
7186
7187 // CHECK-LABEL: @test_vmlal_lane_u16(
7188 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7189 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7190 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7191 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7192 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7193 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7194 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
7195 // CHECK: ret <4 x i32> [[ADD]]
test_vmlal_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7196 uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7197 return vmlal_lane_u16(a, b, c, 3);
7198 }
7199
7200 // CHECK-LABEL: @test_vmlal_lane_u32(
7201 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7202 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7203 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7204 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7205 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7206 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7207 // CHECK: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
7208 // CHECK: ret <2 x i64> [[ADD]]
test_vmlal_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7209 uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7210 return vmlal_lane_u32(a, b, c, 1);
7211 }
7212
7213 // CHECK-LABEL: @test_vmlal_n_s16(
7214 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7215 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7216 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7217 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7218 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7219 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7220 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7221 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7222 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)7223 int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
7224 return vmlal_n_s16(a, b, c);
7225 }
7226
7227 // CHECK-LABEL: @test_vmlal_n_s32(
7228 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7229 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7230 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7231 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7232 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7233 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7234 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)7235 int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
7236 return vmlal_n_s32(a, b, c);
7237 }
7238
7239 // CHECK-LABEL: @test_vmlal_n_u16(
7240 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7241 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7242 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7243 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7244 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7245 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7246 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7247 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7248 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)7249 uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
7250 return vmlal_n_u16(a, b, c);
7251 }
7252
7253 // CHECK-LABEL: @test_vmlal_n_u32(
7254 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7255 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7256 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7257 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7258 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7259 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7260 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)7261 uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
7262 return vmlal_n_u32(a, b, c);
7263 }
7264
7265 // CHECK-LABEL: @test_vmla_lane_s16(
7266 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7267 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7268 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7269 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7270 // CHECK: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
7271 // CHECK: ret <4 x i16> [[ADD]]
test_vmla_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)7272 int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7273 return vmla_lane_s16(a, b, c, 3);
7274 }
7275
7276 // CHECK-LABEL: @test_vmla_lane_s32(
7277 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7278 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7279 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7280 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7281 // CHECK: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
7282 // CHECK: ret <2 x i32> [[ADD]]
test_vmla_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)7283 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7284 return vmla_lane_s32(a, b, c, 1);
7285 }
7286
7287 // CHECK-LABEL: @test_vmla_lane_u16(
7288 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7289 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7290 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7291 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7292 // CHECK: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
7293 // CHECK: ret <4 x i16> [[ADD]]
test_vmla_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7294 uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7295 return vmla_lane_u16(a, b, c, 3);
7296 }
7297
7298 // CHECK-LABEL: @test_vmla_lane_u32(
7299 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7300 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7301 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7302 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7303 // CHECK: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
7304 // CHECK: ret <2 x i32> [[ADD]]
test_vmla_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7305 uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7306 return vmla_lane_u32(a, b, c, 1);
7307 }
7308
7309 // CHECK-LABEL: @test_vmla_lane_f32(
7310 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7311 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7312 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
7313 // CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
7314 // CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
7315 // CHECK: ret <2 x float> [[ADD]]
test_vmla_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)7316 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7317 return vmla_lane_f32(a, b, c, 1);
7318 }
7319
7320 // CHECK-LABEL: @test_vmlaq_lane_s16(
7321 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7322 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7323 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7324 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7325 // CHECK: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
7326 // CHECK: ret <8 x i16> [[ADD]]
test_vmlaq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)7327 int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
7328 return vmlaq_lane_s16(a, b, c, 3);
7329 }
7330
7331 // CHECK-LABEL: @test_vmlaq_lane_s32(
7332 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7333 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7334 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7335 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7336 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
7337 // CHECK: ret <4 x i32> [[ADD]]
test_vmlaq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)7338 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
7339 return vmlaq_lane_s32(a, b, c, 1);
7340 }
7341
7342 // CHECK-LABEL: @test_vmlaq_lane_u16(
7343 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7344 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7345 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7346 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7347 // CHECK: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
7348 // CHECK: ret <8 x i16> [[ADD]]
test_vmlaq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)7349 uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
7350 return vmlaq_lane_u16(a, b, c, 3);
7351 }
7352
7353 // CHECK-LABEL: @test_vmlaq_lane_u32(
7354 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7355 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7356 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7357 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7358 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
7359 // CHECK: ret <4 x i32> [[ADD]]
test_vmlaq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)7360 uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
7361 return vmlaq_lane_u32(a, b, c, 1);
7362 }
7363
7364 // CHECK-LABEL: @test_vmlaq_lane_f32(
7365 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7366 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7367 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7368 // CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
7369 // CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
7370 // CHECK: ret <4 x float> [[ADD]]
test_vmlaq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)7371 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
7372 return vmlaq_lane_f32(a, b, c, 1);
7373 }
7374
7375 // CHECK-LABEL: @test_vmla_n_s16(
7376 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7377 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7378 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7379 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7380 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7381 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7382 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_n_s16(int16x4_t a,int16x4_t b,int16_t c)7383 int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
7384 return vmla_n_s16(a, b, c);
7385 }
7386
7387 // CHECK-LABEL: @test_vmla_n_s32(
7388 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7389 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7390 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7391 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7392 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_n_s32(int32x2_t a,int32x2_t b,int32_t c)7393 int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
7394 return vmla_n_s32(a, b, c);
7395 }
7396
7397 // CHECK-LABEL: @test_vmla_n_u16(
7398 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7399 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7400 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7401 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7402 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7403 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7404 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)7405 uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
7406 return vmla_n_u16(a, b, c);
7407 }
7408
7409 // CHECK-LABEL: @test_vmla_n_u32(
7410 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7411 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7412 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7413 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7414 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)7415 uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
7416 return vmla_n_u32(a, b, c);
7417 }
7418
7419 // CHECK-LABEL: @test_vmla_n_f32(
7420 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
7421 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
7422 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
7423 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
7424 // CHECK: ret <2 x float> [[ADD_I]]
test_vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)7425 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
7426 return vmla_n_f32(a, b, c);
7427 }
7428
7429 // CHECK-LABEL: @test_vmlaq_n_s16(
7430 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7431 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7432 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7433 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7434 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7435 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7436 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7437 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7438 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7439 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7440 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_n_s16(int16x8_t a,int16x8_t b,int16_t c)7441 int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
7442 return vmlaq_n_s16(a, b, c);
7443 }
7444
7445 // CHECK-LABEL: @test_vmlaq_n_s32(
7446 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7447 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7448 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7449 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7450 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7451 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7452 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_n_s32(int32x4_t a,int32x4_t b,int32_t c)7453 int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7454 return vmlaq_n_s32(a, b, c);
7455 }
7456
7457 // CHECK-LABEL: @test_vmlaq_n_u16(
7458 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7459 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7460 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7461 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7462 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7463 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7464 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7465 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7466 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7467 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7468 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)7469 uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7470 return vmlaq_n_u16(a, b, c);
7471 }
7472
7473 // CHECK-LABEL: @test_vmlaq_n_u32(
7474 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7475 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7476 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7477 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7478 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7479 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7480 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)7481 uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7482 return vmlaq_n_u32(a, b, c);
7483 }
7484
7485 // CHECK-LABEL: @test_vmlaq_n_f32(
7486 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7487 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7488 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7489 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7490 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7491 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
7492 // CHECK: ret <4 x float> [[ADD_I]]
test_vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7493 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7494 return vmlaq_n_f32(a, b, c);
7495 }
7496
7497 // CHECK-LABEL: @test_vmls_s8(
7498 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7499 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7500 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t a,int8x8_t b,int8x8_t c)7501 int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
7502 return vmls_s8(a, b, c);
7503 }
7504
7505 // CHECK-LABEL: @test_vmls_s16(
7506 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7507 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7508 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_s16(int16x4_t a,int16x4_t b,int16x4_t c)7509 int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7510 return vmls_s16(a, b, c);
7511 }
7512
7513 // CHECK-LABEL: @test_vmls_s32(
7514 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7515 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7516 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t a,int32x2_t b,int32x2_t c)7517 int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7518 return vmls_s32(a, b, c);
7519 }
7520
7521 // CHECK-LABEL: @test_vmls_f32(
7522 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
7523 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7524 // CHECK: ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t a,float32x2_t b,float32x2_t c)7525 float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7526 return vmls_f32(a, b, c);
7527 }
7528
7529 // CHECK-LABEL: @test_vmls_u8(
7530 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7531 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7532 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)7533 uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
7534 return vmls_u8(a, b, c);
7535 }
7536
7537 // CHECK-LABEL: @test_vmls_u16(
7538 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7539 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7540 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7541 uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7542 return vmls_u16(a, b, c);
7543 }
7544
7545 // CHECK-LABEL: @test_vmls_u32(
7546 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7547 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7548 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7549 uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7550 return vmls_u32(a, b, c);
7551 }
7552
7553 // CHECK-LABEL: @test_vmlsq_s8(
7554 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7555 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7556 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t a,int8x16_t b,int8x16_t c)7557 int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
7558 return vmlsq_s8(a, b, c);
7559 }
7560
7561 // CHECK-LABEL: @test_vmlsq_s16(
7562 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7563 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7564 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t a,int16x8_t b,int16x8_t c)7565 int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
7566 return vmlsq_s16(a, b, c);
7567 }
7568
7569 // CHECK-LABEL: @test_vmlsq_s32(
7570 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7571 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7572 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t a,int32x4_t b,int32x4_t c)7573 int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
7574 return vmlsq_s32(a, b, c);
7575 }
7576
7577 // CHECK-LABEL: @test_vmlsq_f32(
7578 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
7579 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7580 // CHECK: ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7581 float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
7582 return vmlsq_f32(a, b, c);
7583 }
7584
7585 // CHECK-LABEL: @test_vmlsq_u8(
7586 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7587 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7588 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)7589 uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
7590 return vmlsq_u8(a, b, c);
7591 }
7592
7593 // CHECK-LABEL: @test_vmlsq_u16(
7594 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7595 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7596 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)7597 uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
7598 return vmlsq_u16(a, b, c);
7599 }
7600
7601 // CHECK-LABEL: @test_vmlsq_u32(
7602 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7603 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7604 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)7605 uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
7606 return vmlsq_u32(a, b, c);
7607 }
7608
7609 // CHECK-LABEL: @test_vmlsl_s8(
7610 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
7611 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7612 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)7613 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7614 return vmlsl_s8(a, b, c);
7615 }
7616
7617 // CHECK-LABEL: @test_vmlsl_s16(
7618 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7619 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7620 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
7621 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7622 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)7623 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7624 return vmlsl_s16(a, b, c);
7625 }
7626
7627 // CHECK-LABEL: @test_vmlsl_s32(
7628 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7629 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7630 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
7631 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7632 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)7633 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7634 return vmlsl_s32(a, b, c);
7635 }
7636
7637 // CHECK-LABEL: @test_vmlsl_u8(
7638 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
7639 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7640 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7641 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7642 return vmlsl_u8(a, b, c);
7643 }
7644
7645 // CHECK-LABEL: @test_vmlsl_u16(
7646 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7647 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7648 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
7649 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7650 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7651 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7652 return vmlsl_u16(a, b, c);
7653 }
7654
7655 // CHECK-LABEL: @test_vmlsl_u32(
7656 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7657 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7658 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
7659 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7660 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7661 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7662 return vmlsl_u32(a, b, c);
7663 }
7664
7665 // CHECK-LABEL: @test_vmlsl_lane_s16(
7666 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7667 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7668 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7669 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7670 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7671 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7672 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7673 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)7674 int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7675 return vmlsl_lane_s16(a, b, c, 3);
7676 }
7677
7678 // CHECK-LABEL: @test_vmlsl_lane_s32(
7679 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7680 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7681 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7682 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7683 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7684 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7685 // CHECK: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7686 // CHECK: ret <2 x i64> [[SUB]]
test_vmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)7687 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7688 return vmlsl_lane_s32(a, b, c, 1);
7689 }
7690
7691 // CHECK-LABEL: @test_vmlsl_lane_u16(
7692 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7693 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7694 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7695 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7696 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7697 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7698 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7699 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsl_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7700 uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7701 return vmlsl_lane_u16(a, b, c, 3);
7702 }
7703
7704 // CHECK-LABEL: @test_vmlsl_lane_u32(
7705 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7706 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7707 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7708 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7709 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7710 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7711 // CHECK: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7712 // CHECK: ret <2 x i64> [[SUB]]
test_vmlsl_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7713 uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7714 return vmlsl_lane_u32(a, b, c, 1);
7715 }
7716
7717 // CHECK-LABEL: @test_vmlsl_n_s16(
7718 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7719 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7720 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7721 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7722 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7723 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7724 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7725 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7726 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)7727 int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
7728 return vmlsl_n_s16(a, b, c);
7729 }
7730
7731 // CHECK-LABEL: @test_vmlsl_n_s32(
7732 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7733 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7734 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7735 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7736 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7737 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7738 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)7739 int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
7740 return vmlsl_n_s32(a, b, c);
7741 }
7742
7743 // CHECK-LABEL: @test_vmlsl_n_u16(
7744 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7745 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7746 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7747 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7748 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7749 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7750 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7751 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7752 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)7753 uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
7754 return vmlsl_n_u16(a, b, c);
7755 }
7756
7757 // CHECK-LABEL: @test_vmlsl_n_u32(
7758 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7759 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7760 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7761 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7762 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7763 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7764 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)7765 uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
7766 return vmlsl_n_u32(a, b, c);
7767 }
7768
7769 // CHECK-LABEL: @test_vmls_lane_s16(
7770 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7771 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7772 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7773 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7774 // CHECK: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7775 // CHECK: ret <4 x i16> [[SUB]]
test_vmls_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)7776 int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7777 return vmls_lane_s16(a, b, c, 3);
7778 }
7779
7780 // CHECK-LABEL: @test_vmls_lane_s32(
7781 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7782 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7783 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7784 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7785 // CHECK: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7786 // CHECK: ret <2 x i32> [[SUB]]
test_vmls_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)7787 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7788 return vmls_lane_s32(a, b, c, 1);
7789 }
7790
7791 // CHECK-LABEL: @test_vmls_lane_u16(
7792 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7793 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7794 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7795 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7796 // CHECK: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7797 // CHECK: ret <4 x i16> [[SUB]]
test_vmls_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7798 uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7799 return vmls_lane_u16(a, b, c, 3);
7800 }
7801
7802 // CHECK-LABEL: @test_vmls_lane_u32(
7803 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7804 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7805 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7806 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7807 // CHECK: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7808 // CHECK: ret <2 x i32> [[SUB]]
test_vmls_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7809 uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7810 return vmls_lane_u32(a, b, c, 1);
7811 }
7812
7813 // CHECK-LABEL: @test_vmls_lane_f32(
7814 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7815 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7816 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
7817 // CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
7818 // CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
7819 // CHECK: ret <2 x float> [[SUB]]
test_vmls_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)7820 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7821 return vmls_lane_f32(a, b, c, 1);
7822 }
7823
7824 // CHECK-LABEL: @test_vmlsq_lane_s16(
7825 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7826 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7827 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7828 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7829 // CHECK: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7830 // CHECK: ret <8 x i16> [[SUB]]
test_vmlsq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)7831 int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
7832 return vmlsq_lane_s16(a, b, c, 3);
7833 }
7834
7835 // CHECK-LABEL: @test_vmlsq_lane_s32(
7836 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7837 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7838 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7839 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7840 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7841 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)7842 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
7843 return vmlsq_lane_s32(a, b, c, 1);
7844 }
7845
7846 // CHECK-LABEL: @test_vmlsq_lane_u16(
7847 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7848 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7849 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7850 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7851 // CHECK: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7852 // CHECK: ret <8 x i16> [[SUB]]
test_vmlsq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)7853 uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
7854 return vmlsq_lane_u16(a, b, c, 3);
7855 }
7856
7857 // CHECK-LABEL: @test_vmlsq_lane_u32(
7858 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7859 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7860 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7861 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7862 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7863 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)7864 uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
7865 return vmlsq_lane_u32(a, b, c, 1);
7866 }
7867
7868 // CHECK-LABEL: @test_vmlsq_lane_f32(
7869 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7870 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7871 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7872 // CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
7873 // CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
7874 // CHECK: ret <4 x float> [[SUB]]
test_vmlsq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)7875 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
7876 return vmlsq_lane_f32(a, b, c, 1);
7877 }
7878
7879 // CHECK-LABEL: @test_vmls_n_s16(
7880 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7881 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7882 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7883 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7884 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7885 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7886 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_n_s16(int16x4_t a,int16x4_t b,int16_t c)7887 int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
7888 return vmls_n_s16(a, b, c);
7889 }
7890
7891 // CHECK-LABEL: @test_vmls_n_s32(
7892 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7893 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7894 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7895 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7896 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_n_s32(int32x2_t a,int32x2_t b,int32_t c)7897 int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
7898 return vmls_n_s32(a, b, c);
7899 }
7900
7901 // CHECK-LABEL: @test_vmls_n_u16(
7902 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7903 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7904 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7905 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7906 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7907 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7908 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)7909 uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
7910 return vmls_n_u16(a, b, c);
7911 }
7912
7913 // CHECK-LABEL: @test_vmls_n_u32(
7914 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7915 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7916 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7917 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7918 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)7919 uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
7920 return vmls_n_u32(a, b, c);
7921 }
7922
7923 // CHECK-LABEL: @test_vmls_n_f32(
7924 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
7925 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
7926 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
7927 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7928 // CHECK: ret <2 x float> [[SUB_I]]
test_vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)7929 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
7930 return vmls_n_f32(a, b, c);
7931 }
7932
7933 // CHECK-LABEL: @test_vmlsq_n_s16(
7934 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7935 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7936 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7937 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7938 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7939 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7940 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7941 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7942 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7943 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7944 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_n_s16(int16x8_t a,int16x8_t b,int16_t c)7945 int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
7946 return vmlsq_n_s16(a, b, c);
7947 }
7948
7949 // CHECK-LABEL: @test_vmlsq_n_s32(
7950 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7951 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7952 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7953 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7954 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7955 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7956 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_n_s32(int32x4_t a,int32x4_t b,int32_t c)7957 int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7958 return vmlsq_n_s32(a, b, c);
7959 }
7960
7961 // CHECK-LABEL: @test_vmlsq_n_u16(
7962 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7963 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7964 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7965 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7966 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7967 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7968 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7969 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7970 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7971 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7972 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)7973 uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7974 return vmlsq_n_u16(a, b, c);
7975 }
7976
7977 // CHECK-LABEL: @test_vmlsq_n_u32(
7978 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7979 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7980 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7981 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7982 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7983 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7984 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)7985 uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7986 return vmlsq_n_u32(a, b, c);
7987 }
7988
7989 // CHECK-LABEL: @test_vmlsq_n_f32(
7990 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7991 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7992 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7993 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7994 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7995 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7996 // CHECK: ret <4 x float> [[SUB_I]]
test_vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7997 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7998 return vmlsq_n_f32(a, b, c);
7999 }
8000
8001 // CHECK-LABEL: @test_vmovl_s8(
8002 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
8003 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)8004 int16x8_t test_vmovl_s8(int8x8_t a) {
8005 return vmovl_s8(a);
8006 }
8007
8008 // CHECK-LABEL: @test_vmovl_s16(
8009 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8010 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
8011 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)8012 int32x4_t test_vmovl_s16(int16x4_t a) {
8013 return vmovl_s16(a);
8014 }
8015
8016 // CHECK-LABEL: @test_vmovl_s32(
8017 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8018 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
8019 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)8020 int64x2_t test_vmovl_s32(int32x2_t a) {
8021 return vmovl_s32(a);
8022 }
8023
8024 // CHECK-LABEL: @test_vmovl_u8(
8025 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
8026 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)8027 uint16x8_t test_vmovl_u8(uint8x8_t a) {
8028 return vmovl_u8(a);
8029 }
8030
8031 // CHECK-LABEL: @test_vmovl_u16(
8032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8033 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
8034 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)8035 uint32x4_t test_vmovl_u16(uint16x4_t a) {
8036 return vmovl_u16(a);
8037 }
8038
8039 // CHECK-LABEL: @test_vmovl_u32(
8040 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8041 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
8042 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)8043 uint64x2_t test_vmovl_u32(uint32x2_t a) {
8044 return vmovl_u32(a);
8045 }
8046
8047 // CHECK-LABEL: @test_vmovn_s16(
8048 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8049 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
8050 // CHECK: ret <8 x i8> [[VMOVN_I]]
test_vmovn_s16(int16x8_t a)8051 int8x8_t test_vmovn_s16(int16x8_t a) {
8052 return vmovn_s16(a);
8053 }
8054
8055 // CHECK-LABEL: @test_vmovn_s32(
8056 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8057 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
8058 // CHECK: ret <4 x i16> [[VMOVN_I]]
test_vmovn_s32(int32x4_t a)8059 int16x4_t test_vmovn_s32(int32x4_t a) {
8060 return vmovn_s32(a);
8061 }
8062
8063 // CHECK-LABEL: @test_vmovn_s64(
8064 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8065 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
8066 // CHECK: ret <2 x i32> [[VMOVN_I]]
test_vmovn_s64(int64x2_t a)8067 int32x2_t test_vmovn_s64(int64x2_t a) {
8068 return vmovn_s64(a);
8069 }
8070
8071 // CHECK-LABEL: @test_vmovn_u16(
8072 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8073 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
8074 // CHECK: ret <8 x i8> [[VMOVN_I]]
test_vmovn_u16(uint16x8_t a)8075 uint8x8_t test_vmovn_u16(uint16x8_t a) {
8076 return vmovn_u16(a);
8077 }
8078
8079 // CHECK-LABEL: @test_vmovn_u32(
8080 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8081 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
8082 // CHECK: ret <4 x i16> [[VMOVN_I]]
test_vmovn_u32(uint32x4_t a)8083 uint16x4_t test_vmovn_u32(uint32x4_t a) {
8084 return vmovn_u32(a);
8085 }
8086
8087 // CHECK-LABEL: @test_vmovn_u64(
8088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8089 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
8090 // CHECK: ret <2 x i32> [[VMOVN_I]]
test_vmovn_u64(uint64x2_t a)8091 uint32x2_t test_vmovn_u64(uint64x2_t a) {
8092 return vmovn_u64(a);
8093 }
8094
8095 // CHECK-LABEL: @test_vmov_n_u8(
8096 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8097 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8098 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8099 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8100 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8101 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8102 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8103 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8104 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_u8(uint8_t a)8105 uint8x8_t test_vmov_n_u8(uint8_t a) {
8106 return vmov_n_u8(a);
8107 }
8108
8109 // CHECK-LABEL: @test_vmov_n_u16(
8110 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8111 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8112 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8113 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8114 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_u16(uint16_t a)8115 uint16x4_t test_vmov_n_u16(uint16_t a) {
8116 return vmov_n_u16(a);
8117 }
8118
8119 // CHECK-LABEL: @test_vmov_n_u32(
8120 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
8121 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
8122 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_u32(uint32_t a)8123 uint32x2_t test_vmov_n_u32(uint32_t a) {
8124 return vmov_n_u32(a);
8125 }
8126
8127 // CHECK-LABEL: @test_vmov_n_s8(
8128 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8129 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8130 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8131 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8132 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8133 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8134 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8135 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8136 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_s8(int8_t a)8137 int8x8_t test_vmov_n_s8(int8_t a) {
8138 return vmov_n_s8(a);
8139 }
8140
8141 // CHECK-LABEL: @test_vmov_n_s16(
8142 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8143 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8144 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8145 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8146 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_s16(int16_t a)8147 int16x4_t test_vmov_n_s16(int16_t a) {
8148 return vmov_n_s16(a);
8149 }
8150
8151 // CHECK-LABEL: @test_vmov_n_s32(
8152 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
8153 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
8154 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_s32(int32_t a)8155 int32x2_t test_vmov_n_s32(int32_t a) {
8156 return vmov_n_s32(a);
8157 }
8158
8159 // CHECK-LABEL: @test_vmov_n_p8(
8160 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8161 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8162 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8163 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8164 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8165 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8166 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8167 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8168 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_p8(poly8_t a)8169 poly8x8_t test_vmov_n_p8(poly8_t a) {
8170 return vmov_n_p8(a);
8171 }
8172
8173 // CHECK-LABEL: @test_vmov_n_p16(
8174 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8175 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8176 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8177 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8178 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_p16(poly16_t a)8179 poly16x4_t test_vmov_n_p16(poly16_t a) {
8180 return vmov_n_p16(a);
8181 }
8182
8183 // CHECK-LABEL: @test_vmov_n_f16(
8184 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
8185 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
8186 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
8187 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
8188 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
8189 // CHECK: ret <4 x half> [[VECINIT3]]
test_vmov_n_f16(float16_t * a)8190 float16x4_t test_vmov_n_f16(float16_t *a) {
8191 return vmov_n_f16(*a);
8192 }
8193
8194 // CHECK-LABEL: @test_vmov_n_f32(
8195 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
8196 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
8197 // CHECK: ret <2 x float> [[VECINIT1_I]]
test_vmov_n_f32(float32_t a)8198 float32x2_t test_vmov_n_f32(float32_t a) {
8199 return vmov_n_f32(a);
8200 }
8201
8202 // CHECK-LABEL: @test_vmovq_n_u8(
8203 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8204 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8205 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8206 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8207 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8208 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8209 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8210 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8211 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8212 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8213 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8214 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8215 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8216 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8217 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8218 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8219 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_u8(uint8_t a)8220 uint8x16_t test_vmovq_n_u8(uint8_t a) {
8221 return vmovq_n_u8(a);
8222 }
8223
8224 // CHECK-LABEL: @test_vmovq_n_u16(
8225 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8226 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8227 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8228 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8229 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8230 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8231 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8232 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8233 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_u16(uint16_t a)8234 uint16x8_t test_vmovq_n_u16(uint16_t a) {
8235 return vmovq_n_u16(a);
8236 }
8237
8238 // CHECK-LABEL: @test_vmovq_n_u32(
8239 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
8240 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
8241 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
8242 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
8243 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_u32(uint32_t a)8244 uint32x4_t test_vmovq_n_u32(uint32_t a) {
8245 return vmovq_n_u32(a);
8246 }
8247
8248 // CHECK-LABEL: @test_vmovq_n_s8(
8249 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8250 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8251 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8252 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8253 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8254 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8255 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8256 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8257 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8258 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8259 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8260 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8261 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8262 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8263 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8264 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8265 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_s8(int8_t a)8266 int8x16_t test_vmovq_n_s8(int8_t a) {
8267 return vmovq_n_s8(a);
8268 }
8269
8270 // CHECK-LABEL: @test_vmovq_n_s16(
8271 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8272 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8273 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8274 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8275 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8276 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8277 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8278 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8279 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_s16(int16_t a)8280 int16x8_t test_vmovq_n_s16(int16_t a) {
8281 return vmovq_n_s16(a);
8282 }
8283
8284 // CHECK-LABEL: @test_vmovq_n_s32(
8285 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
8286 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
8287 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
8288 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
8289 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_s32(int32_t a)8290 int32x4_t test_vmovq_n_s32(int32_t a) {
8291 return vmovq_n_s32(a);
8292 }
8293
8294 // CHECK-LABEL: @test_vmovq_n_p8(
8295 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8296 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8297 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8298 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8299 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8300 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8301 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8302 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8303 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8304 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8305 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8306 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8307 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8308 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8309 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8310 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8311 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_p8(poly8_t a)8312 poly8x16_t test_vmovq_n_p8(poly8_t a) {
8313 return vmovq_n_p8(a);
8314 }
8315
8316 // CHECK-LABEL: @test_vmovq_n_p16(
8317 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8318 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8319 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8320 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8321 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8322 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8323 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8324 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8325 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_p16(poly16_t a)8326 poly16x8_t test_vmovq_n_p16(poly16_t a) {
8327 return vmovq_n_p16(a);
8328 }
8329
8330 // CHECK-LABEL: @test_vmovq_n_f16(
8331 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
8332 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
8333 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
8334 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
8335 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
8336 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
8337 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
8338 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
8339 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
8340 // CHECK: ret <8 x half> [[VECINIT7]]
test_vmovq_n_f16(float16_t * a)8341 float16x8_t test_vmovq_n_f16(float16_t *a) {
8342 return vmovq_n_f16(*a);
8343 }
8344
8345 // CHECK-LABEL: @test_vmovq_n_f32(
8346 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
8347 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
8348 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
8349 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
8350 // CHECK: ret <4 x float> [[VECINIT3_I]]
test_vmovq_n_f32(float32_t a)8351 float32x4_t test_vmovq_n_f32(float32_t a) {
8352 return vmovq_n_f32(a);
8353 }
8354
8355 // CHECK-LABEL: @test_vmov_n_s64(
8356 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
8357 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
8358 // CHECK: ret <1 x i64> [[ADD_I]]
test_vmov_n_s64(int64_t a)8359 int64x1_t test_vmov_n_s64(int64_t a) {
8360 int64x1_t tmp = vmov_n_s64(a);
8361 return vadd_s64(tmp, tmp);
8362 }
8363
8364 // CHECK-LABEL: @test_vmov_n_u64(
8365 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
8366 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
8367 // CHECK: ret <1 x i64> [[ADD_I]]
test_vmov_n_u64(uint64_t a)8368 uint64x1_t test_vmov_n_u64(uint64_t a) {
8369 uint64x1_t tmp = vmov_n_u64(a);
8370 return vadd_u64(tmp, tmp);
8371 }
8372
8373 // CHECK-LABEL: @test_vmovq_n_s64(
8374 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
8375 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
8376 // CHECK: ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_s64(int64_t a)8377 int64x2_t test_vmovq_n_s64(int64_t a) {
8378 return vmovq_n_s64(a);
8379 }
8380
8381 // CHECK-LABEL: @test_vmovq_n_u64(
8382 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
8383 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
8384 // CHECK: ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_u64(uint64_t a)8385 uint64x2_t test_vmovq_n_u64(uint64_t a) {
8386 return vmovq_n_u64(a);
8387 }
8388
8389 // CHECK-LABEL: @test_vmul_s8(
8390 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
8391 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t a,int8x8_t b)8392 int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) {
8393 return vmul_s8(a, b);
8394 }
8395
8396 // CHECK-LABEL: @test_vmul_s16(
8397 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
8398 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t a,int16x4_t b)8399 int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) {
8400 return vmul_s16(a, b);
8401 }
8402
8403 // CHECK-LABEL: @test_vmul_s32(
8404 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
8405 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t a,int32x2_t b)8406 int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) {
8407 return vmul_s32(a, b);
8408 }
8409
8410 // CHECK-LABEL: @test_vmul_f32(
8411 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, %b
8412 // CHECK: ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t a,float32x2_t b)8413 float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) {
8414 return vmul_f32(a, b);
8415 }
8416
8417 // CHECK-LABEL: @test_vmul_u8(
8418 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
8419 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t a,uint8x8_t b)8420 uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) {
8421 return vmul_u8(a, b);
8422 }
8423
8424 // CHECK-LABEL: @test_vmul_u16(
8425 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
8426 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t a,uint16x4_t b)8427 uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) {
8428 return vmul_u16(a, b);
8429 }
8430
8431 // CHECK-LABEL: @test_vmul_u32(
8432 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
8433 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t a,uint32x2_t b)8434 uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) {
8435 return vmul_u32(a, b);
8436 }
8437
8438 // CHECK-LABEL: @test_vmulq_s8(
8439 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
8440 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t a,int8x16_t b)8441 int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) {
8442 return vmulq_s8(a, b);
8443 }
8444
8445 // CHECK-LABEL: @test_vmulq_s16(
8446 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
8447 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t a,int16x8_t b)8448 int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) {
8449 return vmulq_s16(a, b);
8450 }
8451
8452 // CHECK-LABEL: @test_vmulq_s32(
8453 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8454 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t a,int32x4_t b)8455 int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) {
8456 return vmulq_s32(a, b);
8457 }
8458
8459 // CHECK-LABEL: @test_vmulq_f32(
8460 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, %b
8461 // CHECK: ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t a,float32x4_t b)8462 float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) {
8463 return vmulq_f32(a, b);
8464 }
8465
8466 // CHECK-LABEL: @test_vmulq_u8(
8467 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
8468 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t a,uint8x16_t b)8469 uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) {
8470 return vmulq_u8(a, b);
8471 }
8472
8473 // CHECK-LABEL: @test_vmulq_u16(
8474 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
8475 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t a,uint16x8_t b)8476 uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) {
8477 return vmulq_u16(a, b);
8478 }
8479
8480 // CHECK-LABEL: @test_vmulq_u32(
8481 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8482 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t a,uint32x4_t b)8483 uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) {
8484 return vmulq_u32(a, b);
8485 }
8486
8487 // CHECK-LABEL: @test_vmull_s8(
8488 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
8489 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)8490 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
8491 return vmull_s8(a, b);
8492 }
8493
8494 // CHECK-LABEL: @test_vmull_s16(
8495 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8496 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8497 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
8498 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)8499 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
8500 return vmull_s16(a, b);
8501 }
8502
8503 // CHECK-LABEL: @test_vmull_s32(
8504 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8505 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8506 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
8507 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)8508 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
8509 return vmull_s32(a, b);
8510 }
8511
8512 // CHECK-LABEL: @test_vmull_u8(
8513 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
8514 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)8515 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
8516 return vmull_u8(a, b);
8517 }
8518
8519 // CHECK-LABEL: @test_vmull_u16(
8520 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8521 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8522 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
8523 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)8524 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
8525 return vmull_u16(a, b);
8526 }
8527
8528 // CHECK-LABEL: @test_vmull_u32(
8529 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8530 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8531 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
8532 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)8533 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
8534 return vmull_u32(a, b);
8535 }
8536
8537 // CHECK-LABEL: @test_vmull_p8(
8538 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
8539 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)8540 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8541 return vmull_p8(a, b);
8542 }
8543
8544 // CHECK-LABEL: @test_vmull_lane_s16(
8545 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8546 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8547 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8548 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8549 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8550 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
8551 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_s16(int16x4_t a,int16x4_t b)8552 int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) {
8553 return vmull_lane_s16(a, b, 3);
8554 }
8555
8556 // CHECK-LABEL: @test_vmull_lane_s32(
8557 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8558 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8559 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8560 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8561 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8562 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
8563 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_s32(int32x2_t a,int32x2_t b)8564 int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) {
8565 return vmull_lane_s32(a, b, 1);
8566 }
8567
8568 // CHECK-LABEL: @test_vmull_lane_u16(
8569 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8571 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8572 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8573 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8574 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
8575 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_u16(uint16x4_t a,uint16x4_t b)8576 uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) {
8577 return vmull_lane_u16(a, b, 3);
8578 }
8579
8580 // CHECK-LABEL: @test_vmull_lane_u32(
8581 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8582 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8583 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8584 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8585 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8586 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
8587 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_u32(uint32x2_t a,uint32x2_t b)8588 uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) {
8589 return vmull_lane_u32(a, b, 1);
8590 }
8591
8592 // CHECK-LABEL: @test_vmull_n_s16(
8593 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8594 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8595 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8596 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8597 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8598 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8599 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8600 // CHECK: ret <4 x i32> [[VMULL5_I]]
test_vmull_n_s16(int16x4_t a,int16_t b)8601 int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
8602 return vmull_n_s16(a, b);
8603 }
8604
8605 // CHECK-LABEL: @test_vmull_n_s32(
8606 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8607 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8608 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8609 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8610 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8611 // CHECK: ret <2 x i64> [[VMULL3_I]]
test_vmull_n_s32(int32x2_t a,int32_t b)8612 int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
8613 return vmull_n_s32(a, b);
8614 }
8615
8616 // CHECK-LABEL: @test_vmull_n_u16(
8617 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8618 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8619 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8620 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8621 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8622 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8623 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8624 // CHECK: ret <4 x i32> [[VMULL5_I]]
test_vmull_n_u16(uint16x4_t a,uint16_t b)8625 uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
8626 return vmull_n_u16(a, b);
8627 }
8628
8629 // CHECK-LABEL: @test_vmull_n_u32(
8630 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8631 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8632 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8633 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8634 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8635 // CHECK: ret <2 x i64> [[VMULL3_I]]
test_vmull_n_u32(uint32x2_t a,uint32_t b)8636 uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
8637 return vmull_n_u32(a, b);
8638 }
8639
8640 // CHECK-LABEL: @test_vmul_p8(
8641 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b)
8642 // CHECK: ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t a,poly8x8_t b)8643 poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) {
8644 return vmul_p8(a, b);
8645 }
8646
8647 // CHECK-LABEL: @test_vmulq_p8(
8648 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b)
8649 // CHECK: ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t a,poly8x16_t b)8650 poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) {
8651 return vmulq_p8(a, b);
8652 }
8653
8654 // CHECK-LABEL: @test_vmul_lane_s16(
8655 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8656 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8657 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8658 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8659 // CHECK: ret <4 x i16> [[MUL]]
test_vmul_lane_s16(int16x4_t a,int16x4_t b)8660 int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) {
8661 return vmul_lane_s16(a, b, 3);
8662 }
8663
8664 // CHECK-LABEL: @test_vmul_lane_s32(
8665 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8666 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8667 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8668 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8669 // CHECK: ret <2 x i32> [[MUL]]
test_vmul_lane_s32(int32x2_t a,int32x2_t b)8670 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) {
8671 return vmul_lane_s32(a, b, 1);
8672 }
8673
8674 // CHECK-LABEL: @test_vmul_lane_f32(
8675 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8676 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8677 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
8678 // CHECK: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
8679 // CHECK: ret <2 x float> [[MUL]]
test_vmul_lane_f32(float32x2_t a,float32x2_t b)8680 float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) {
8681 return vmul_lane_f32(a, b, 1);
8682 }
8683
8684 // CHECK-LABEL: @test_vmul_lane_u16(
8685 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8686 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8687 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8688 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8689 // CHECK: ret <4 x i16> [[MUL]]
test_vmul_lane_u16(uint16x4_t a,uint16x4_t b)8690 uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) {
8691 return vmul_lane_u16(a, b, 3);
8692 }
8693
8694 // CHECK-LABEL: @test_vmul_lane_u32(
8695 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8696 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8697 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8698 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8699 // CHECK: ret <2 x i32> [[MUL]]
test_vmul_lane_u32(uint32x2_t a,uint32x2_t b)8700 uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) {
8701 return vmul_lane_u32(a, b, 1);
8702 }
8703
8704 // CHECK-LABEL: @test_vmulq_lane_s16(
8705 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8706 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8707 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8708 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8709 // CHECK: ret <8 x i16> [[MUL]]
test_vmulq_lane_s16(int16x8_t a,int16x4_t b)8710 int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) {
8711 return vmulq_lane_s16(a, b, 3);
8712 }
8713
8714 // CHECK-LABEL: @test_vmulq_lane_s32(
8715 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8716 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8717 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8718 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8719 // CHECK: ret <4 x i32> [[MUL]]
test_vmulq_lane_s32(int32x4_t a,int32x2_t b)8720 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) {
8721 return vmulq_lane_s32(a, b, 1);
8722 }
8723
8724 // CHECK-LABEL: @test_vmulq_lane_f32(
8725 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8726 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8727 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8728 // CHECK: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
8729 // CHECK: ret <4 x float> [[MUL]]
test_vmulq_lane_f32(float32x4_t a,float32x2_t b)8730 float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) {
8731 return vmulq_lane_f32(a, b, 1);
8732 }
8733
8734 // CHECK-LABEL: @test_vmulq_lane_u16(
8735 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8736 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8737 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8738 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8739 // CHECK: ret <8 x i16> [[MUL]]
test_vmulq_lane_u16(uint16x8_t a,uint16x4_t b)8740 uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) {
8741 return vmulq_lane_u16(a, b, 3);
8742 }
8743
8744 // CHECK-LABEL: @test_vmulq_lane_u32(
8745 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8746 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8747 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8748 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8749 // CHECK: ret <4 x i32> [[MUL]]
test_vmulq_lane_u32(uint32x4_t a,uint32x2_t b)8750 uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) {
8751 return vmulq_lane_u32(a, b, 1);
8752 }
8753
8754 // CHECK-LABEL: @test_vmul_n_s16(
8755 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8756 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8757 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8758 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8759 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8760 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_n_s16(int16x4_t a,int16_t b)8761 int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
8762 return vmul_n_s16(a, b);
8763 }
8764
8765 // CHECK-LABEL: @test_vmul_n_s32(
8766 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8767 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8768 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8769 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_n_s32(int32x2_t a,int32_t b)8770 int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
8771 return vmul_n_s32(a, b);
8772 }
8773
8774 // CHECK-LABEL: @test_vmul_n_f32(
8775 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0
8776 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1
8777 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]]
8778 // CHECK: ret <2 x float> [[MUL_I]]
test_vmul_n_f32(float32x2_t a,float32_t b)8779 float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
8780 return vmul_n_f32(a, b);
8781 }
8782
8783 // CHECK-LABEL: @test_vmul_n_u16(
8784 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8785 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8786 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8787 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8788 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8789 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_n_u16(uint16x4_t a,uint16_t b)8790 uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
8791 return vmul_n_u16(a, b);
8792 }
8793
8794 // CHECK-LABEL: @test_vmul_n_u32(
8795 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8796 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8797 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8798 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_n_u32(uint32x2_t a,uint32_t b)8799 uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
8800 return vmul_n_u32(a, b);
8801 }
8802
8803 // CHECK-LABEL: @test_vmulq_n_s16(
8804 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8805 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8806 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8807 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8808 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8809 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8810 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8811 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8812 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8813 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_n_s16(int16x8_t a,int16_t b)8814 int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
8815 return vmulq_n_s16(a, b);
8816 }
8817
8818 // CHECK-LABEL: @test_vmulq_n_s32(
8819 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8820 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8821 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8822 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8823 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8824 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_n_s32(int32x4_t a,int32_t b)8825 int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
8826 return vmulq_n_s32(a, b);
8827 }
8828
8829 // CHECK-LABEL: @test_vmulq_n_f32(
8830 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0
8831 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1
8832 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2
8833 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3
8834 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]]
8835 // CHECK: ret <4 x float> [[MUL_I]]
test_vmulq_n_f32(float32x4_t a,float32_t b)8836 float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
8837 return vmulq_n_f32(a, b);
8838 }
8839
8840 // CHECK-LABEL: @test_vmulq_n_u16(
8841 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8842 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8843 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8844 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8845 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8846 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8847 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8848 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8849 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8850 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_n_u16(uint16x8_t a,uint16_t b)8851 uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
8852 return vmulq_n_u16(a, b);
8853 }
8854
8855 // CHECK-LABEL: @test_vmulq_n_u32(
8856 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8857 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8858 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8859 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8860 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8861 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_n_u32(uint32x4_t a,uint32_t b)8862 uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
8863 return vmulq_n_u32(a, b);
8864 }
8865
8866 // CHECK-LABEL: @test_vmvn_s8(
8867 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8868 // CHECK: ret <8 x i8> [[NEG_I]]
test_vmvn_s8(int8x8_t a)8869 int8x8_t test_vmvn_s8(int8x8_t a) {
8870 return vmvn_s8(a);
8871 }
8872
8873 // CHECK-LABEL: @test_vmvn_s16(
8874 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8875 // CHECK: ret <4 x i16> [[NEG_I]]
test_vmvn_s16(int16x4_t a)8876 int16x4_t test_vmvn_s16(int16x4_t a) {
8877 return vmvn_s16(a);
8878 }
8879
8880 // CHECK-LABEL: @test_vmvn_s32(
8881 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8882 // CHECK: ret <2 x i32> [[NEG_I]]
test_vmvn_s32(int32x2_t a)8883 int32x2_t test_vmvn_s32(int32x2_t a) {
8884 return vmvn_s32(a);
8885 }
8886
8887 // CHECK-LABEL: @test_vmvn_u8(
8888 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8889 // CHECK: ret <8 x i8> [[NEG_I]]
test_vmvn_u8(uint8x8_t a)8890 uint8x8_t test_vmvn_u8(uint8x8_t a) {
8891 return vmvn_u8(a);
8892 }
8893
8894 // CHECK-LABEL: @test_vmvn_u16(
8895 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8896 // CHECK: ret <4 x i16> [[NEG_I]]
test_vmvn_u16(uint16x4_t a)8897 uint16x4_t test_vmvn_u16(uint16x4_t a) {
8898 return vmvn_u16(a);
8899 }
8900
8901 // CHECK-LABEL: @test_vmvn_u32(
8902 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8903 // CHECK: ret <2 x i32> [[NEG_I]]
test_vmvn_u32(uint32x2_t a)8904 uint32x2_t test_vmvn_u32(uint32x2_t a) {
8905 return vmvn_u32(a);
8906 }
8907
8908 // CHECK-LABEL: @test_vmvn_p8(
8909 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8910 // CHECK: ret <8 x i8> [[NEG_I]]
test_vmvn_p8(poly8x8_t a)8911 poly8x8_t test_vmvn_p8(poly8x8_t a) {
8912 return vmvn_p8(a);
8913 }
8914
8915 // CHECK-LABEL: @test_vmvnq_s8(
8916 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8917 // CHECK: ret <16 x i8> [[NEG_I]]
test_vmvnq_s8(int8x16_t a)8918 int8x16_t test_vmvnq_s8(int8x16_t a) {
8919 return vmvnq_s8(a);
8920 }
8921
8922 // CHECK-LABEL: @test_vmvnq_s16(
8923 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8924 // CHECK: ret <8 x i16> [[NEG_I]]
test_vmvnq_s16(int16x8_t a)8925 int16x8_t test_vmvnq_s16(int16x8_t a) {
8926 return vmvnq_s16(a);
8927 }
8928
8929 // CHECK-LABEL: @test_vmvnq_s32(
8930 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8931 // CHECK: ret <4 x i32> [[NEG_I]]
test_vmvnq_s32(int32x4_t a)8932 int32x4_t test_vmvnq_s32(int32x4_t a) {
8933 return vmvnq_s32(a);
8934 }
8935
8936 // CHECK-LABEL: @test_vmvnq_u8(
8937 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8938 // CHECK: ret <16 x i8> [[NEG_I]]
test_vmvnq_u8(uint8x16_t a)8939 uint8x16_t test_vmvnq_u8(uint8x16_t a) {
8940 return vmvnq_u8(a);
8941 }
8942
8943 // CHECK-LABEL: @test_vmvnq_u16(
8944 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8945 // CHECK: ret <8 x i16> [[NEG_I]]
test_vmvnq_u16(uint16x8_t a)8946 uint16x8_t test_vmvnq_u16(uint16x8_t a) {
8947 return vmvnq_u16(a);
8948 }
8949
8950 // CHECK-LABEL: @test_vmvnq_u32(
8951 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8952 // CHECK: ret <4 x i32> [[NEG_I]]
test_vmvnq_u32(uint32x4_t a)8953 uint32x4_t test_vmvnq_u32(uint32x4_t a) {
8954 return vmvnq_u32(a);
8955 }
8956
8957 // CHECK-LABEL: @test_vmvnq_p8(
8958 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8959 // CHECK: ret <16 x i8> [[NEG_I]]
test_vmvnq_p8(poly8x16_t a)8960 poly8x16_t test_vmvnq_p8(poly8x16_t a) {
8961 return vmvnq_p8(a);
8962 }
8963
8964 // CHECK-LABEL: @test_vneg_s8(
8965 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
8966 // CHECK: ret <8 x i8> [[SUB_I]]
test_vneg_s8(int8x8_t a)8967 int8x8_t test_vneg_s8(int8x8_t a) {
8968 return vneg_s8(a);
8969 }
8970
8971 // CHECK-LABEL: @test_vneg_s16(
8972 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
8973 // CHECK: ret <4 x i16> [[SUB_I]]
test_vneg_s16(int16x4_t a)8974 int16x4_t test_vneg_s16(int16x4_t a) {
8975 return vneg_s16(a);
8976 }
8977
8978 // CHECK-LABEL: @test_vneg_s32(
8979 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
8980 // CHECK: ret <2 x i32> [[SUB_I]]
test_vneg_s32(int32x2_t a)8981 int32x2_t test_vneg_s32(int32x2_t a) {
8982 return vneg_s32(a);
8983 }
8984
8985 // CHECK-LABEL: @test_vneg_f32(
8986 // CHECK: [[SUB_I:%.*]] = fneg <2 x float> %a
8987 // CHECK: ret <2 x float> [[SUB_I]]
test_vneg_f32(float32x2_t a)8988 float32x2_t test_vneg_f32(float32x2_t a) {
8989 return vneg_f32(a);
8990 }
8991
8992 // CHECK-LABEL: @test_vnegq_s8(
8993 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
8994 // CHECK: ret <16 x i8> [[SUB_I]]
test_vnegq_s8(int8x16_t a)8995 int8x16_t test_vnegq_s8(int8x16_t a) {
8996 return vnegq_s8(a);
8997 }
8998
8999 // CHECK-LABEL: @test_vnegq_s16(
9000 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
9001 // CHECK: ret <8 x i16> [[SUB_I]]
test_vnegq_s16(int16x8_t a)9002 int16x8_t test_vnegq_s16(int16x8_t a) {
9003 return vnegq_s16(a);
9004 }
9005
9006 // CHECK-LABEL: @test_vnegq_s32(
9007 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
9008 // CHECK: ret <4 x i32> [[SUB_I]]
test_vnegq_s32(int32x4_t a)9009 int32x4_t test_vnegq_s32(int32x4_t a) {
9010 return vnegq_s32(a);
9011 }
9012
9013 // CHECK-LABEL: @test_vnegq_f32(
9014 // CHECK: [[SUB_I:%.*]] = fneg <4 x float> %a
9015 // CHECK: ret <4 x float> [[SUB_I]]
test_vnegq_f32(float32x4_t a)9016 float32x4_t test_vnegq_f32(float32x4_t a) {
9017 return vnegq_f32(a);
9018 }
9019
9020 // CHECK-LABEL: @test_vorn_s8(
9021 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9022 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
9023 // CHECK: ret <8 x i8> [[OR_I]]
test_vorn_s8(int8x8_t a,int8x8_t b)9024 int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
9025 return vorn_s8(a, b);
9026 }
9027
9028 // CHECK-LABEL: @test_vorn_s16(
9029 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
9030 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
9031 // CHECK: ret <4 x i16> [[OR_I]]
test_vorn_s16(int16x4_t a,int16x4_t b)9032 int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
9033 return vorn_s16(a, b);
9034 }
9035
9036 // CHECK-LABEL: @test_vorn_s32(
9037 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
9038 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
9039 // CHECK: ret <2 x i32> [[OR_I]]
test_vorn_s32(int32x2_t a,int32x2_t b)9040 int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
9041 return vorn_s32(a, b);
9042 }
9043
9044 // CHECK-LABEL: @test_vorn_s64(
9045 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
9046 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
9047 // CHECK: ret <1 x i64> [[OR_I]]
test_vorn_s64(int64x1_t a,int64x1_t b)9048 int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
9049 return vorn_s64(a, b);
9050 }
9051
9052 // CHECK-LABEL: @test_vorn_u8(
9053 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9054 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
9055 // CHECK: ret <8 x i8> [[OR_I]]
test_vorn_u8(uint8x8_t a,uint8x8_t b)9056 uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
9057 return vorn_u8(a, b);
9058 }
9059
9060 // CHECK-LABEL: @test_vorn_u16(
9061 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
9062 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
9063 // CHECK: ret <4 x i16> [[OR_I]]
test_vorn_u16(uint16x4_t a,uint16x4_t b)9064 uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
9065 return vorn_u16(a, b);
9066 }
9067
9068 // CHECK-LABEL: @test_vorn_u32(
9069 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
9070 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
9071 // CHECK: ret <2 x i32> [[OR_I]]
test_vorn_u32(uint32x2_t a,uint32x2_t b)9072 uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
9073 return vorn_u32(a, b);
9074 }
9075
9076 // CHECK-LABEL: @test_vorn_u64(
9077 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
9078 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
9079 // CHECK: ret <1 x i64> [[OR_I]]
test_vorn_u64(uint64x1_t a,uint64x1_t b)9080 uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
9081 return vorn_u64(a, b);
9082 }
9083
9084 // CHECK-LABEL: @test_vornq_s8(
9085 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9086 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
9087 // CHECK: ret <16 x i8> [[OR_I]]
test_vornq_s8(int8x16_t a,int8x16_t b)9088 int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
9089 return vornq_s8(a, b);
9090 }
9091
9092 // CHECK-LABEL: @test_vornq_s16(
9093 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
9094 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
9095 // CHECK: ret <8 x i16> [[OR_I]]
test_vornq_s16(int16x8_t a,int16x8_t b)9096 int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
9097 return vornq_s16(a, b);
9098 }
9099
9100 // CHECK-LABEL: @test_vornq_s32(
9101 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
9102 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
9103 // CHECK: ret <4 x i32> [[OR_I]]
test_vornq_s32(int32x4_t a,int32x4_t b)9104 int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
9105 return vornq_s32(a, b);
9106 }
9107
9108 // CHECK-LABEL: @test_vornq_s64(
9109 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
9110 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
9111 // CHECK: ret <2 x i64> [[OR_I]]
test_vornq_s64(int64x2_t a,int64x2_t b)9112 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
9113 return vornq_s64(a, b);
9114 }
9115
9116 // CHECK-LABEL: @test_vornq_u8(
9117 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9118 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
9119 // CHECK: ret <16 x i8> [[OR_I]]
test_vornq_u8(uint8x16_t a,uint8x16_t b)9120 uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
9121 return vornq_u8(a, b);
9122 }
9123
9124 // CHECK-LABEL: @test_vornq_u16(
9125 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
9126 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
9127 // CHECK: ret <8 x i16> [[OR_I]]
test_vornq_u16(uint16x8_t a,uint16x8_t b)9128 uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
9129 return vornq_u16(a, b);
9130 }
9131
9132 // CHECK-LABEL: @test_vornq_u32(
9133 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
9134 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
9135 // CHECK: ret <4 x i32> [[OR_I]]
test_vornq_u32(uint32x4_t a,uint32x4_t b)9136 uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
9137 return vornq_u32(a, b);
9138 }
9139
9140 // CHECK-LABEL: @test_vornq_u64(
9141 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
9142 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
9143 // CHECK: ret <2 x i64> [[OR_I]]
test_vornq_u64(uint64x2_t a,uint64x2_t b)9144 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
9145 return vornq_u64(a, b);
9146 }
9147
9148 // CHECK-LABEL: @test_vorr_s8(
9149 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
9150 // CHECK: ret <8 x i8> [[OR_I]]
test_vorr_s8(int8x8_t a,int8x8_t b)9151 int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
9152 return vorr_s8(a, b);
9153 }
9154
9155 // CHECK-LABEL: @test_vorr_s16(
9156 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
9157 // CHECK: ret <4 x i16> [[OR_I]]
test_vorr_s16(int16x4_t a,int16x4_t b)9158 int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
9159 return vorr_s16(a, b);
9160 }
9161
9162 // CHECK-LABEL: @test_vorr_s32(
9163 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
9164 // CHECK: ret <2 x i32> [[OR_I]]
test_vorr_s32(int32x2_t a,int32x2_t b)9165 int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
9166 return vorr_s32(a, b);
9167 }
9168
9169 // CHECK-LABEL: @test_vorr_s64(
9170 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
9171 // CHECK: ret <1 x i64> [[OR_I]]
test_vorr_s64(int64x1_t a,int64x1_t b)9172 int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
9173 return vorr_s64(a, b);
9174 }
9175
9176 // CHECK-LABEL: @test_vorr_u8(
9177 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
9178 // CHECK: ret <8 x i8> [[OR_I]]
test_vorr_u8(uint8x8_t a,uint8x8_t b)9179 uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
9180 return vorr_u8(a, b);
9181 }
9182
9183 // CHECK-LABEL: @test_vorr_u16(
9184 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
9185 // CHECK: ret <4 x i16> [[OR_I]]
test_vorr_u16(uint16x4_t a,uint16x4_t b)9186 uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
9187 return vorr_u16(a, b);
9188 }
9189
9190 // CHECK-LABEL: @test_vorr_u32(
9191 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
9192 // CHECK: ret <2 x i32> [[OR_I]]
test_vorr_u32(uint32x2_t a,uint32x2_t b)9193 uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
9194 return vorr_u32(a, b);
9195 }
9196
9197 // CHECK-LABEL: @test_vorr_u64(
9198 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
9199 // CHECK: ret <1 x i64> [[OR_I]]
test_vorr_u64(uint64x1_t a,uint64x1_t b)9200 uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
9201 return vorr_u64(a, b);
9202 }
9203
9204 // CHECK-LABEL: @test_vorrq_s8(
9205 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
9206 // CHECK: ret <16 x i8> [[OR_I]]
test_vorrq_s8(int8x16_t a,int8x16_t b)9207 int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
9208 return vorrq_s8(a, b);
9209 }
9210
9211 // CHECK-LABEL: @test_vorrq_s16(
9212 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
9213 // CHECK: ret <8 x i16> [[OR_I]]
test_vorrq_s16(int16x8_t a,int16x8_t b)9214 int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
9215 return vorrq_s16(a, b);
9216 }
9217
9218 // CHECK-LABEL: @test_vorrq_s32(
9219 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
9220 // CHECK: ret <4 x i32> [[OR_I]]
test_vorrq_s32(int32x4_t a,int32x4_t b)9221 int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
9222 return vorrq_s32(a, b);
9223 }
9224
9225 // CHECK-LABEL: @test_vorrq_s64(
9226 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
9227 // CHECK: ret <2 x i64> [[OR_I]]
test_vorrq_s64(int64x2_t a,int64x2_t b)9228 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
9229 return vorrq_s64(a, b);
9230 }
9231
9232 // CHECK-LABEL: @test_vorrq_u8(
9233 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
9234 // CHECK: ret <16 x i8> [[OR_I]]
test_vorrq_u8(uint8x16_t a,uint8x16_t b)9235 uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
9236 return vorrq_u8(a, b);
9237 }
9238
9239 // CHECK-LABEL: @test_vorrq_u16(
9240 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
9241 // CHECK: ret <8 x i16> [[OR_I]]
test_vorrq_u16(uint16x8_t a,uint16x8_t b)9242 uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
9243 return vorrq_u16(a, b);
9244 }
9245
9246 // CHECK-LABEL: @test_vorrq_u32(
9247 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
9248 // CHECK: ret <4 x i32> [[OR_I]]
test_vorrq_u32(uint32x4_t a,uint32x4_t b)9249 uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
9250 return vorrq_u32(a, b);
9251 }
9252
9253 // CHECK-LABEL: @test_vorrq_u64(
9254 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
9255 // CHECK: ret <2 x i64> [[OR_I]]
test_vorrq_u64(uint64x2_t a,uint64x2_t b)9256 uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
9257 return vorrq_u64(a, b);
9258 }
9259
9260 // CHECK-LABEL: @test_vpadal_s8(
9261 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9262 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
9263 // CHECK: ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_s8(int16x4_t a,int8x8_t b)9264 int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
9265 return vpadal_s8(a, b);
9266 }
9267
9268 // CHECK-LABEL: @test_vpadal_s16(
9269 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9270 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9271 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
9272 // CHECK: ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_s16(int32x2_t a,int16x4_t b)9273 int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
9274 return vpadal_s16(a, b);
9275 }
9276
9277 // CHECK-LABEL: @test_vpadal_s32(
9278 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9279 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9280 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
9281 // CHECK: ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_s32(int64x1_t a,int32x2_t b)9282 int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
9283 return vpadal_s32(a, b);
9284 }
9285
9286 // CHECK-LABEL: @test_vpadal_u8(
9287 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9288 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
9289 // CHECK: ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_u8(uint16x4_t a,uint8x8_t b)9290 uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
9291 return vpadal_u8(a, b);
9292 }
9293
9294 // CHECK-LABEL: @test_vpadal_u16(
9295 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9296 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9297 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
9298 // CHECK: ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_u16(uint32x2_t a,uint16x4_t b)9299 uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
9300 return vpadal_u16(a, b);
9301 }
9302
9303 // CHECK-LABEL: @test_vpadal_u32(
9304 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9305 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9306 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
9307 // CHECK: ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_u32(uint64x1_t a,uint32x2_t b)9308 uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
9309 return vpadal_u32(a, b);
9310 }
9311
9312 // CHECK-LABEL: @test_vpadalq_s8(
9313 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9314 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
9315 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_s8(int16x8_t a,int8x16_t b)9316 int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
9317 return vpadalq_s8(a, b);
9318 }
9319
9320 // CHECK-LABEL: @test_vpadalq_s16(
9321 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9322 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9323 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
9324 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_s16(int32x4_t a,int16x8_t b)9325 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
9326 return vpadalq_s16(a, b);
9327 }
9328
9329 // CHECK-LABEL: @test_vpadalq_s32(
9330 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9331 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9332 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
9333 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_s32(int64x2_t a,int32x4_t b)9334 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
9335 return vpadalq_s32(a, b);
9336 }
9337
9338 // CHECK-LABEL: @test_vpadalq_u8(
9339 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9340 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
9341 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_u8(uint16x8_t a,uint8x16_t b)9342 uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
9343 return vpadalq_u8(a, b);
9344 }
9345
9346 // CHECK-LABEL: @test_vpadalq_u16(
9347 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9348 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9349 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
9350 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_u16(uint32x4_t a,uint16x8_t b)9351 uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
9352 return vpadalq_u16(a, b);
9353 }
9354
9355 // CHECK-LABEL: @test_vpadalq_u32(
9356 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9357 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9358 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
9359 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_u32(uint64x2_t a,uint32x4_t b)9360 uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
9361 return vpadalq_u32(a, b);
9362 }
9363
9364 // CHECK-LABEL: @test_vpadd_s8(
9365 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
9366 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)9367 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
9368 return vpadd_s8(a, b);
9369 }
9370
9371 // CHECK-LABEL: @test_vpadd_s16(
9372 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9373 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9374 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
9375 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
9376 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_s16(int16x4_t a,int16x4_t b)9377 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
9378 return vpadd_s16(a, b);
9379 }
9380
9381 // CHECK-LABEL: @test_vpadd_s32(
9382 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9383 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9384 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
9385 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
9386 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_s32(int32x2_t a,int32x2_t b)9387 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
9388 return vpadd_s32(a, b);
9389 }
9390
9391 // CHECK-LABEL: @test_vpadd_u8(
9392 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
9393 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)9394 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
9395 return vpadd_u8(a, b);
9396 }
9397
9398 // CHECK-LABEL: @test_vpadd_u16(
9399 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9400 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9401 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
9402 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
9403 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)9404 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
9405 return vpadd_u16(a, b);
9406 }
9407
9408 // CHECK-LABEL: @test_vpadd_u32(
9409 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9410 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9411 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
9412 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
9413 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)9414 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
9415 return vpadd_u32(a, b);
9416 }
9417
9418 // CHECK-LABEL: @test_vpadd_f32(
9419 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9420 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9421 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b)
9422 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
9423 // CHECK: ret <2 x float> [[VPADD_V2_I]]
test_vpadd_f32(float32x2_t a,float32x2_t b)9424 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
9425 return vpadd_f32(a, b);
9426 }
9427
9428 // CHECK-LABEL: @test_vpaddl_s8(
9429 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a)
9430 // CHECK: ret <4 x i16> [[VPADDL_I]]
test_vpaddl_s8(int8x8_t a)9431 int16x4_t test_vpaddl_s8(int8x8_t a) {
9432 return vpaddl_s8(a);
9433 }
9434
9435 // CHECK-LABEL: @test_vpaddl_s16(
9436 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9437 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a)
9438 // CHECK: ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_s16(int16x4_t a)9439 int32x2_t test_vpaddl_s16(int16x4_t a) {
9440 return vpaddl_s16(a);
9441 }
9442
9443 // CHECK-LABEL: @test_vpaddl_s32(
9444 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9445 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a)
9446 // CHECK: ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_s32(int32x2_t a)9447 int64x1_t test_vpaddl_s32(int32x2_t a) {
9448 return vpaddl_s32(a);
9449 }
9450
9451 // CHECK-LABEL: @test_vpaddl_u8(
9452 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a)
9453 // CHECK: ret <4 x i16> [[VPADDL_I]]
test_vpaddl_u8(uint8x8_t a)9454 uint16x4_t test_vpaddl_u8(uint8x8_t a) {
9455 return vpaddl_u8(a);
9456 }
9457
9458 // CHECK-LABEL: @test_vpaddl_u16(
9459 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9460 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a)
9461 // CHECK: ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_u16(uint16x4_t a)9462 uint32x2_t test_vpaddl_u16(uint16x4_t a) {
9463 return vpaddl_u16(a);
9464 }
9465
9466 // CHECK-LABEL: @test_vpaddl_u32(
9467 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9468 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a)
9469 // CHECK: ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_u32(uint32x2_t a)9470 uint64x1_t test_vpaddl_u32(uint32x2_t a) {
9471 return vpaddl_u32(a);
9472 }
9473
9474 // CHECK-LABEL: @test_vpaddlq_s8(
9475 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a)
9476 // CHECK: ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_s8(int8x16_t a)9477 int16x8_t test_vpaddlq_s8(int8x16_t a) {
9478 return vpaddlq_s8(a);
9479 }
9480
9481 // CHECK-LABEL: @test_vpaddlq_s16(
9482 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9483 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a)
9484 // CHECK: ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_s16(int16x8_t a)9485 int32x4_t test_vpaddlq_s16(int16x8_t a) {
9486 return vpaddlq_s16(a);
9487 }
9488
9489 // CHECK-LABEL: @test_vpaddlq_s32(
9490 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9491 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a)
9492 // CHECK: ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_s32(int32x4_t a)9493 int64x2_t test_vpaddlq_s32(int32x4_t a) {
9494 return vpaddlq_s32(a);
9495 }
9496
9497 // CHECK-LABEL: @test_vpaddlq_u8(
9498 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a)
9499 // CHECK: ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_u8(uint8x16_t a)9500 uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
9501 return vpaddlq_u8(a);
9502 }
9503
9504 // CHECK-LABEL: @test_vpaddlq_u16(
9505 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9506 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a)
9507 // CHECK: ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_u16(uint16x8_t a)9508 uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
9509 return vpaddlq_u16(a);
9510 }
9511
9512 // CHECK-LABEL: @test_vpaddlq_u32(
9513 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9514 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a)
9515 // CHECK: ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_u32(uint32x4_t a)9516 uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
9517 return vpaddlq_u32(a);
9518 }
9519
9520 // CHECK-LABEL: @test_vpmax_s8(
9521 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
9522 // CHECK: ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)9523 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
9524 return vpmax_s8(a, b);
9525 }
9526
9527 // CHECK-LABEL: @test_vpmax_s16(
9528 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9529 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9530 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
9531 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9532 // CHECK: ret <4 x i16> [[VPMAX_V2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)9533 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
9534 return vpmax_s16(a, b);
9535 }
9536
9537 // CHECK-LABEL: @test_vpmax_s32(
9538 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9539 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9540 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
9541 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9542 // CHECK: ret <2 x i32> [[VPMAX_V2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)9543 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
9544 return vpmax_s32(a, b);
9545 }
9546
9547 // CHECK-LABEL: @test_vpmax_u8(
9548 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
9549 // CHECK: ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)9550 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
9551 return vpmax_u8(a, b);
9552 }
9553
9554 // CHECK-LABEL: @test_vpmax_u16(
9555 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9556 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9557 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
9558 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9559 // CHECK: ret <4 x i16> [[VPMAX_V2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)9560 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
9561 return vpmax_u16(a, b);
9562 }
9563
9564 // CHECK-LABEL: @test_vpmax_u32(
9565 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9566 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9567 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
9568 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9569 // CHECK: ret <2 x i32> [[VPMAX_V2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)9570 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
9571 return vpmax_u32(a, b);
9572 }
9573
9574 // CHECK-LABEL: @test_vpmax_f32(
9575 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9576 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9577 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b)
9578 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8>
9579 // CHECK: ret <2 x float> [[VPMAX_V2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)9580 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
9581 return vpmax_f32(a, b);
9582 }
9583
9584 // CHECK-LABEL: @test_vpmin_s8(
9585 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b)
9586 // CHECK: ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)9587 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
9588 return vpmin_s8(a, b);
9589 }
9590
9591 // CHECK-LABEL: @test_vpmin_s16(
9592 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9593 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9594 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b)
9595 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9596 // CHECK: ret <4 x i16> [[VPMIN_V2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)9597 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
9598 return vpmin_s16(a, b);
9599 }
9600
9601 // CHECK-LABEL: @test_vpmin_s32(
9602 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9603 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9604 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b)
9605 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9606 // CHECK: ret <2 x i32> [[VPMIN_V2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)9607 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
9608 return vpmin_s32(a, b);
9609 }
9610
9611 // CHECK-LABEL: @test_vpmin_u8(
9612 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b)
9613 // CHECK: ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)9614 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
9615 return vpmin_u8(a, b);
9616 }
9617
9618 // CHECK-LABEL: @test_vpmin_u16(
9619 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9620 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9621 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b)
9622 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9623 // CHECK: ret <4 x i16> [[VPMIN_V2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)9624 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
9625 return vpmin_u16(a, b);
9626 }
9627
9628 // CHECK-LABEL: @test_vpmin_u32(
9629 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9630 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9631 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b)
9632 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9633 // CHECK: ret <2 x i32> [[VPMIN_V2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)9634 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
9635 return vpmin_u32(a, b);
9636 }
9637
9638 // CHECK-LABEL: @test_vpmin_f32(
9639 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9640 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9641 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b)
9642 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8>
9643 // CHECK: ret <2 x float> [[VPMIN_V2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)9644 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
9645 return vpmin_f32(a, b);
9646 }
9647
9648 // CHECK-LABEL: @test_vqabs_s8(
9649 // CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a)
9650 // CHECK: ret <8 x i8> [[VQABS_V_I]]
test_vqabs_s8(int8x8_t a)9651 int8x8_t test_vqabs_s8(int8x8_t a) {
9652 return vqabs_s8(a);
9653 }
9654
9655 // CHECK-LABEL: @test_vqabs_s16(
9656 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9657 // CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a)
9658 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
9659 // CHECK: ret <4 x i16> [[VQABS_V1_I]]
test_vqabs_s16(int16x4_t a)9660 int16x4_t test_vqabs_s16(int16x4_t a) {
9661 return vqabs_s16(a);
9662 }
9663
9664 // CHECK-LABEL: @test_vqabs_s32(
9665 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9666 // CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a)
9667 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
9668 // CHECK: ret <2 x i32> [[VQABS_V1_I]]
test_vqabs_s32(int32x2_t a)9669 int32x2_t test_vqabs_s32(int32x2_t a) {
9670 return vqabs_s32(a);
9671 }
9672
9673 // CHECK-LABEL: @test_vqabsq_s8(
9674 // CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a)
9675 // CHECK: ret <16 x i8> [[VQABSQ_V_I]]
test_vqabsq_s8(int8x16_t a)9676 int8x16_t test_vqabsq_s8(int8x16_t a) {
9677 return vqabsq_s8(a);
9678 }
9679
9680 // CHECK-LABEL: @test_vqabsq_s16(
9681 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9682 // CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a)
9683 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
9684 // CHECK: ret <8 x i16> [[VQABSQ_V1_I]]
test_vqabsq_s16(int16x8_t a)9685 int16x8_t test_vqabsq_s16(int16x8_t a) {
9686 return vqabsq_s16(a);
9687 }
9688
9689 // CHECK-LABEL: @test_vqabsq_s32(
9690 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9691 // CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a)
9692 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
9693 // CHECK: ret <4 x i32> [[VQABSQ_V1_I]]
test_vqabsq_s32(int32x4_t a)9694 int32x4_t test_vqabsq_s32(int32x4_t a) {
9695 return vqabsq_s32(a);
9696 }
9697
9698 // CHECK-LABEL: @test_vqadd_s8(
9699 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9700 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)9701 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
9702 return vqadd_s8(a, b);
9703 }
9704
9705 // CHECK-LABEL: @test_vqadd_s16(
9706 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9707 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9708 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9709 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9710 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_s16(int16x4_t a,int16x4_t b)9711 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
9712 return vqadd_s16(a, b);
9713 }
9714
9715 // CHECK-LABEL: @test_vqadd_s32(
9716 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9717 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9718 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9719 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9720 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_s32(int32x2_t a,int32x2_t b)9721 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
9722 return vqadd_s32(a, b);
9723 }
9724
9725 // CHECK-LABEL: @test_vqadd_s64(
9726 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9727 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9728 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9729 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9730 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_s64(int64x1_t a,int64x1_t b)9731 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
9732 return vqadd_s64(a, b);
9733 }
9734
9735 // CHECK-LABEL: @test_vqadd_u8(
9736 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9737 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)9738 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
9739 return vqadd_u8(a, b);
9740 }
9741
9742 // CHECK-LABEL: @test_vqadd_u16(
9743 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9744 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9745 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9746 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9747 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)9748 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
9749 return vqadd_u16(a, b);
9750 }
9751
9752 // CHECK-LABEL: @test_vqadd_u32(
9753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9754 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9755 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9756 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9757 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)9758 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
9759 return vqadd_u32(a, b);
9760 }
9761
9762 // CHECK-LABEL: @test_vqadd_u64(
9763 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9764 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9765 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9766 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9767 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)9768 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
9769 return vqadd_u64(a, b);
9770 }
9771
9772 // CHECK-LABEL: @test_vqaddq_s8(
9773 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9774 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)9775 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
9776 return vqaddq_s8(a, b);
9777 }
9778
9779 // CHECK-LABEL: @test_vqaddq_s16(
9780 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9781 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9782 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9783 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9784 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)9785 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
9786 return vqaddq_s16(a, b);
9787 }
9788
9789 // CHECK-LABEL: @test_vqaddq_s32(
9790 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9791 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9792 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9793 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9794 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)9795 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
9796 return vqaddq_s32(a, b);
9797 }
9798
9799 // CHECK-LABEL: @test_vqaddq_s64(
9800 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9801 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9802 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9803 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9804 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)9805 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
9806 return vqaddq_s64(a, b);
9807 }
9808
9809 // CHECK-LABEL: @test_vqaddq_u8(
9810 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9811 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)9812 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
9813 return vqaddq_u8(a, b);
9814 }
9815
9816 // CHECK-LABEL: @test_vqaddq_u16(
9817 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9818 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9819 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9820 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9821 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)9822 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
9823 return vqaddq_u16(a, b);
9824 }
9825
9826 // CHECK-LABEL: @test_vqaddq_u32(
9827 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9828 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9829 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9830 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9831 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)9832 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
9833 return vqaddq_u32(a, b);
9834 }
9835
9836 // CHECK-LABEL: @test_vqaddq_u64(
9837 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9838 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9839 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9840 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9841 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)9842 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
9843 return vqaddq_u64(a, b);
9844 }
9845
9846 // CHECK-LABEL: @test_vqdmlal_s16(
9847 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9848 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9849 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9850 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9851 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9852 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)9853 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9854 return vqdmlal_s16(a, b, c);
9855 }
9856
9857 // CHECK-LABEL: @test_vqdmlal_s32(
9858 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9859 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9860 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9861 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9862 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9863 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)9864 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9865 return vqdmlal_s32(a, b, c);
9866 }
9867
9868 // CHECK-LABEL: @test_vqdmlal_lane_s16(
9869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9870 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9871 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9872 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9873 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9874 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9875 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
9876 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
9877 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)9878 int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9879 return vqdmlal_lane_s16(a, b, c, 3);
9880 }
9881
9882 // CHECK-LABEL: @test_vqdmlal_lane_s32(
9883 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9884 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9885 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9886 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9887 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9888 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9889 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
9890 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
9891 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9892 int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9893 return vqdmlal_lane_s32(a, b, c, 1);
9894 }
9895
9896 // CHECK-LABEL: @test_vqdmlal_n_s16(
9897 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9898 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9899 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9900 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9901 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9902 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9903 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9904 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9905 // CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9906 // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]]
test_vqdmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)9907 int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9908 return vqdmlal_n_s16(a, b, c);
9909 }
9910
9911 // CHECK-LABEL: @test_vqdmlal_n_s32(
9912 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9913 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9914 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9915 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9916 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9917 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9918 // CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9919 // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]]
test_vqdmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)9920 int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9921 return vqdmlal_n_s32(a, b, c);
9922 }
9923
9924 // CHECK-LABEL: @test_vqdmlsl_s16(
9925 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9926 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9927 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9928 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9929 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9930 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)9931 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9932 return vqdmlsl_s16(a, b, c);
9933 }
9934
9935 // CHECK-LABEL: @test_vqdmlsl_s32(
9936 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9937 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9938 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9939 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9940 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9941 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)9942 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9943 return vqdmlsl_s32(a, b, c);
9944 }
9945
9946 // CHECK-LABEL: @test_vqdmlsl_lane_s16(
9947 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9948 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9949 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9950 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9951 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9952 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9953 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
9954 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
9955 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)9956 int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9957 return vqdmlsl_lane_s16(a, b, c, 3);
9958 }
9959
9960 // CHECK-LABEL: @test_vqdmlsl_lane_s32(
9961 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9962 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9963 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9964 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9965 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9966 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9967 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
9968 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
9969 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9970 int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9971 return vqdmlsl_lane_s32(a, b, c, 1);
9972 }
9973
9974 // CHECK-LABEL: @test_vqdmlsl_n_s16(
9975 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9976 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9977 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9978 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9979 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9980 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9981 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9982 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9983 // CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9984 // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]]
test_vqdmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)9985 int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9986 return vqdmlsl_n_s16(a, b, c);
9987 }
9988
9989 // CHECK-LABEL: @test_vqdmlsl_n_s32(
9990 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9991 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9992 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9993 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9994 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9995 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9996 // CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9997 // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]]
test_vqdmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)9998 int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9999 return vqdmlsl_n_s32(a, b, c);
10000 }
10001
10002 // CHECK-LABEL: @test_vqdmulh_s16(
10003 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10004 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10005 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
10006 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
10007 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)10008 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
10009 return vqdmulh_s16(a, b);
10010 }
10011
10012 // CHECK-LABEL: @test_vqdmulh_s32(
10013 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10014 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10015 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
10016 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
10017 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)10018 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
10019 return vqdmulh_s32(a, b);
10020 }
10021
10022 // CHECK-LABEL: @test_vqdmulhq_s16(
10023 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10024 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10025 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
10026 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
10027 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)10028 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
10029 return vqdmulhq_s16(a, b);
10030 }
10031
10032 // CHECK-LABEL: @test_vqdmulhq_s32(
10033 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10034 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10035 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
10036 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
10037 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)10038 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
10039 return vqdmulhq_s32(a, b);
10040 }
10041
10042 // CHECK-LABEL: @test_vqdmulh_lane_s16(
10043 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10044 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10045 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10046 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10047 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10048 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]])
10049 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
10050 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_lane_s16(int16x4_t a,int16x4_t b)10051 int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) {
10052 return vqdmulh_lane_s16(a, b, 3);
10053 }
10054
10055 // CHECK-LABEL: @test_vqdmulh_lane_s32(
10056 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10057 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10058 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10059 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10060 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10061 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]])
10062 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
10063 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_lane_s32(int32x2_t a,int32x2_t b)10064 int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) {
10065 return vqdmulh_lane_s32(a, b, 1);
10066 }
10067
10068 // CHECK-LABEL: @test_vqdmulhq_lane_s16(
10069 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10070 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10071 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10072 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
10073 // CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
10074 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]])
10075 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
10076 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_lane_s16(int16x8_t a,int16x4_t b)10077 int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
10078 return vqdmulhq_lane_s16(a, b, 3);
10079 }
10080
10081 // CHECK-LABEL: @test_vqdmulhq_lane_s32(
10082 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10083 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10084 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10085 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
10086 // CHECK: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
10087 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]])
10088 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
10089 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_lane_s32(int32x4_t a,int32x2_t b)10090 int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
10091 return vqdmulhq_lane_s32(a, b, 1);
10092 }
10093
10094 // CHECK-LABEL: @test_vqdmulh_n_s16(
10095 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10096 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10097 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10098 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10099 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10100 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10101 // CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10102 // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8>
10103 // CHECK: ret <4 x i16> [[VQDMULH_V5_I]]
test_vqdmulh_n_s16(int16x4_t a,int16_t b)10104 int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
10105 return vqdmulh_n_s16(a, b);
10106 }
10107
10108 // CHECK-LABEL: @test_vqdmulh_n_s32(
10109 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10110 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10111 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10112 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10113 // CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10114 // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8>
10115 // CHECK: ret <2 x i32> [[VQDMULH_V3_I]]
test_vqdmulh_n_s32(int32x2_t a,int32_t b)10116 int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
10117 return vqdmulh_n_s32(a, b);
10118 }
10119
10120 // CHECK-LABEL: @test_vqdmulhq_n_s16(
10121 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10122 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10123 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10124 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10125 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10126 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10127 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10128 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10129 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10130 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
10131 // CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
10132 // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8>
10133 // CHECK: ret <8 x i16> [[VQDMULHQ_V9_I]]
test_vqdmulhq_n_s16(int16x8_t a,int16_t b)10134 int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
10135 return vqdmulhq_n_s16(a, b);
10136 }
10137
10138 // CHECK-LABEL: @test_vqdmulhq_n_s32(
10139 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10140 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10141 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10142 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10143 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10144 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
10145 // CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
10146 // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8>
10147 // CHECK: ret <4 x i32> [[VQDMULHQ_V5_I]]
test_vqdmulhq_n_s32(int32x4_t a,int32_t b)10148 int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
10149 return vqdmulhq_n_s32(a, b);
10150 }
10151
10152 // CHECK-LABEL: @test_vqdmull_s16(
10153 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10154 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10155 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
10156 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
10157 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)10158 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
10159 return vqdmull_s16(a, b);
10160 }
10161
10162 // CHECK-LABEL: @test_vqdmull_s32(
10163 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10164 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10165 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
10166 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
10167 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)10168 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
10169 return vqdmull_s32(a, b);
10170 }
10171
10172 // CHECK-LABEL: @test_vqdmull_lane_s16(
10173 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10174 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10175 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10176 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10177 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10178 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
10179 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
10180 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_lane_s16(int16x4_t a,int16x4_t b)10181 int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) {
10182 return vqdmull_lane_s16(a, b, 3);
10183 }
10184
10185 // CHECK-LABEL: @test_vqdmull_lane_s32(
10186 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10187 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10188 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10189 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10190 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10191 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
10192 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
10193 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_lane_s32(int32x2_t a,int32x2_t b)10194 int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) {
10195 return vqdmull_lane_s32(a, b, 1);
10196 }
10197
10198 // CHECK-LABEL: @test_vqdmull_n_s16(
10199 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10200 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10201 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10202 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10203 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10204 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10205 // CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10206 // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8>
10207 // CHECK: ret <4 x i32> [[VQDMULL_V5_I]]
test_vqdmull_n_s16(int16x4_t a,int16_t b)10208 int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
10209 return vqdmull_n_s16(a, b);
10210 }
10211
10212 // CHECK-LABEL: @test_vqdmull_n_s32(
10213 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10214 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10215 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10216 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10217 // CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10218 // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8>
10219 // CHECK: ret <2 x i64> [[VQDMULL_V3_I]]
test_vqdmull_n_s32(int32x2_t a,int32_t b)10220 int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
10221 return vqdmull_n_s32(a, b);
10222 }
10223
10224 // CHECK-LABEL: @test_vqmovn_s16(
10225 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10226 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a)
10227 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_s16(int16x8_t a)10228 int8x8_t test_vqmovn_s16(int16x8_t a) {
10229 return vqmovn_s16(a);
10230 }
10231
10232 // CHECK-LABEL: @test_vqmovn_s32(
10233 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10234 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a)
10235 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
10236 // CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
test_vqmovn_s32(int32x4_t a)10237 int16x4_t test_vqmovn_s32(int32x4_t a) {
10238 return vqmovn_s32(a);
10239 }
10240
10241 // CHECK-LABEL: @test_vqmovn_s64(
10242 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10243 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a)
10244 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
10245 // CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
test_vqmovn_s64(int64x2_t a)10246 int32x2_t test_vqmovn_s64(int64x2_t a) {
10247 return vqmovn_s64(a);
10248 }
10249
10250 // CHECK-LABEL: @test_vqmovn_u16(
10251 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10252 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a)
10253 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_u16(uint16x8_t a)10254 uint8x8_t test_vqmovn_u16(uint16x8_t a) {
10255 return vqmovn_u16(a);
10256 }
10257
10258 // CHECK-LABEL: @test_vqmovn_u32(
10259 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10260 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a)
10261 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
10262 // CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
test_vqmovn_u32(uint32x4_t a)10263 uint16x4_t test_vqmovn_u32(uint32x4_t a) {
10264 return vqmovn_u32(a);
10265 }
10266
10267 // CHECK-LABEL: @test_vqmovn_u64(
10268 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10269 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a)
10270 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
10271 // CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
test_vqmovn_u64(uint64x2_t a)10272 uint32x2_t test_vqmovn_u64(uint64x2_t a) {
10273 return vqmovn_u64(a);
10274 }
10275
10276 // CHECK-LABEL: @test_vqmovun_s16(
10277 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10278 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a)
10279 // CHECK: ret <8 x i8> [[VQMOVUN_V1_I]]
test_vqmovun_s16(int16x8_t a)10280 uint8x8_t test_vqmovun_s16(int16x8_t a) {
10281 return vqmovun_s16(a);
10282 }
10283
10284 // CHECK-LABEL: @test_vqmovun_s32(
10285 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10286 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a)
10287 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
10288 // CHECK: ret <4 x i16> [[VQMOVUN_V1_I]]
test_vqmovun_s32(int32x4_t a)10289 uint16x4_t test_vqmovun_s32(int32x4_t a) {
10290 return vqmovun_s32(a);
10291 }
10292
10293 // CHECK-LABEL: @test_vqmovun_s64(
10294 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10295 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a)
10296 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
10297 // CHECK: ret <2 x i32> [[VQMOVUN_V1_I]]
test_vqmovun_s64(int64x2_t a)10298 uint32x2_t test_vqmovun_s64(int64x2_t a) {
10299 return vqmovun_s64(a);
10300 }
10301
10302 // CHECK-LABEL: @test_vqneg_s8(
10303 // CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a)
10304 // CHECK: ret <8 x i8> [[VQNEG_V_I]]
test_vqneg_s8(int8x8_t a)10305 int8x8_t test_vqneg_s8(int8x8_t a) {
10306 return vqneg_s8(a);
10307 }
10308
10309 // CHECK-LABEL: @test_vqneg_s16(
10310 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10311 // CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a)
10312 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
10313 // CHECK: ret <4 x i16> [[VQNEG_V1_I]]
test_vqneg_s16(int16x4_t a)10314 int16x4_t test_vqneg_s16(int16x4_t a) {
10315 return vqneg_s16(a);
10316 }
10317
10318 // CHECK-LABEL: @test_vqneg_s32(
10319 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10320 // CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a)
10321 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
10322 // CHECK: ret <2 x i32> [[VQNEG_V1_I]]
test_vqneg_s32(int32x2_t a)10323 int32x2_t test_vqneg_s32(int32x2_t a) {
10324 return vqneg_s32(a);
10325 }
10326
10327 // CHECK-LABEL: @test_vqnegq_s8(
10328 // CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a)
10329 // CHECK: ret <16 x i8> [[VQNEGQ_V_I]]
test_vqnegq_s8(int8x16_t a)10330 int8x16_t test_vqnegq_s8(int8x16_t a) {
10331 return vqnegq_s8(a);
10332 }
10333
10334 // CHECK-LABEL: @test_vqnegq_s16(
10335 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10336 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a)
10337 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
10338 // CHECK: ret <8 x i16> [[VQNEGQ_V1_I]]
test_vqnegq_s16(int16x8_t a)10339 int16x8_t test_vqnegq_s16(int16x8_t a) {
10340 return vqnegq_s16(a);
10341 }
10342
10343 // CHECK-LABEL: @test_vqnegq_s32(
10344 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10345 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a)
10346 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
10347 // CHECK: ret <4 x i32> [[VQNEGQ_V1_I]]
test_vqnegq_s32(int32x4_t a)10348 int32x4_t test_vqnegq_s32(int32x4_t a) {
10349 return vqnegq_s32(a);
10350 }
10351
10352 // CHECK-LABEL: @test_vqrdmulh_s16(
10353 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10354 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10355 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
10356 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
10357 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)10358 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
10359 return vqrdmulh_s16(a, b);
10360 }
10361
10362 // CHECK-LABEL: @test_vqrdmulh_s32(
10363 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10364 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10365 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
10366 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
10367 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)10368 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
10369 return vqrdmulh_s32(a, b);
10370 }
10371
10372 // CHECK-LABEL: @test_vqrdmulhq_s16(
10373 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10374 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10375 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
10376 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
10377 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)10378 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
10379 return vqrdmulhq_s16(a, b);
10380 }
10381
10382 // CHECK-LABEL: @test_vqrdmulhq_s32(
10383 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10384 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10385 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
10386 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
10387 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)10388 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
10389 return vqrdmulhq_s32(a, b);
10390 }
10391
10392 // CHECK-LABEL: @test_vqrdmulh_lane_s16(
10393 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10394 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10395 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10396 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10397 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10398 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]])
10399 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
10400 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_lane_s16(int16x4_t a,int16x4_t b)10401 int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
10402 return vqrdmulh_lane_s16(a, b, 3);
10403 }
10404
10405 // CHECK-LABEL: @test_vqrdmulh_lane_s32(
10406 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10407 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10408 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10409 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10410 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10411 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]])
10412 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
10413 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_lane_s32(int32x2_t a,int32x2_t b)10414 int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) {
10415 return vqrdmulh_lane_s32(a, b, 1);
10416 }
10417
10418 // CHECK-LABEL: @test_vqrdmulhq_lane_s16(
10419 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10420 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10421 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10422 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
10423 // CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
10424 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]])
10425 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
10426 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_lane_s16(int16x8_t a,int16x4_t b)10427 int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
10428 return vqrdmulhq_lane_s16(a, b, 3);
10429 }
10430
10431 // CHECK-LABEL: @test_vqrdmulhq_lane_s32(
10432 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10433 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10434 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10435 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
10436 // CHECK: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
10437 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]])
10438 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
10439 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_lane_s32(int32x4_t a,int32x2_t b)10440 int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
10441 return vqrdmulhq_lane_s32(a, b, 1);
10442 }
10443
10444 // CHECK-LABEL: @test_vqrdmulh_n_s16(
10445 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10446 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10447 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10448 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10449 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10450 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10451 // CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10452 // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8>
10453 // CHECK: ret <4 x i16> [[VQRDMULH_V5_I]]
test_vqrdmulh_n_s16(int16x4_t a,int16_t b)10454 int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
10455 return vqrdmulh_n_s16(a, b);
10456 }
10457
10458 // CHECK-LABEL: @test_vqrdmulh_n_s32(
10459 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10460 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10461 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10462 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10463 // CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10464 // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8>
10465 // CHECK: ret <2 x i32> [[VQRDMULH_V3_I]]
test_vqrdmulh_n_s32(int32x2_t a,int32_t b)10466 int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
10467 return vqrdmulh_n_s32(a, b);
10468 }
10469
10470 // CHECK-LABEL: @test_vqrdmulhq_n_s16(
10471 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10472 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10473 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10474 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10475 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10476 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10477 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10478 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10479 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10480 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
10481 // CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
10482 // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8>
10483 // CHECK: ret <8 x i16> [[VQRDMULHQ_V9_I]]
test_vqrdmulhq_n_s16(int16x8_t a,int16_t b)10484 int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
10485 return vqrdmulhq_n_s16(a, b);
10486 }
10487
10488 // CHECK-LABEL: @test_vqrdmulhq_n_s32(
10489 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10490 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10491 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10492 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10493 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10494 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
10495 // CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
10496 // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8>
10497 // CHECK: ret <4 x i32> [[VQRDMULHQ_V5_I]]
test_vqrdmulhq_n_s32(int32x4_t a,int32_t b)10498 int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
10499 return vqrdmulhq_n_s32(a, b);
10500 }
10501
10502 // CHECK-LABEL: @test_vqrshl_s8(
10503 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10504 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)10505 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
10506 return vqrshl_s8(a, b);
10507 }
10508
10509 // CHECK-LABEL: @test_vqrshl_s16(
10510 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10511 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10512 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10513 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10514 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)10515 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
10516 return vqrshl_s16(a, b);
10517 }
10518
10519 // CHECK-LABEL: @test_vqrshl_s32(
10520 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10521 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10522 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10523 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10524 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)10525 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
10526 return vqrshl_s32(a, b);
10527 }
10528
10529 // CHECK-LABEL: @test_vqrshl_s64(
10530 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10531 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10532 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10533 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10534 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)10535 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
10536 return vqrshl_s64(a, b);
10537 }
10538
10539 // CHECK-LABEL: @test_vqrshl_u8(
10540 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10541 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)10542 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
10543 return vqrshl_u8(a, b);
10544 }
10545
10546 // CHECK-LABEL: @test_vqrshl_u16(
10547 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10548 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10549 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10550 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10551 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)10552 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
10553 return vqrshl_u16(a, b);
10554 }
10555
10556 // CHECK-LABEL: @test_vqrshl_u32(
10557 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10558 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10559 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10560 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10561 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)10562 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
10563 return vqrshl_u32(a, b);
10564 }
10565
10566 // CHECK-LABEL: @test_vqrshl_u64(
10567 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10568 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10569 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10570 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10571 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)10572 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
10573 return vqrshl_u64(a, b);
10574 }
10575
10576 // CHECK-LABEL: @test_vqrshlq_s8(
10577 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10578 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)10579 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
10580 return vqrshlq_s8(a, b);
10581 }
10582
10583 // CHECK-LABEL: @test_vqrshlq_s16(
10584 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10585 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10586 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10587 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10588 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)10589 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
10590 return vqrshlq_s16(a, b);
10591 }
10592
10593 // CHECK-LABEL: @test_vqrshlq_s32(
10594 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10595 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10596 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10597 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10598 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)10599 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
10600 return vqrshlq_s32(a, b);
10601 }
10602
10603 // CHECK-LABEL: @test_vqrshlq_s64(
10604 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10605 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10606 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10607 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10608 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)10609 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
10610 return vqrshlq_s64(a, b);
10611 }
10612
10613 // CHECK-LABEL: @test_vqrshlq_u8(
10614 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10615 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)10616 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
10617 return vqrshlq_u8(a, b);
10618 }
10619
10620 // CHECK-LABEL: @test_vqrshlq_u16(
10621 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10622 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10623 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10624 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10625 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)10626 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
10627 return vqrshlq_u16(a, b);
10628 }
10629
10630 // CHECK-LABEL: @test_vqrshlq_u32(
10631 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10632 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10633 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10634 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10635 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)10636 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
10637 return vqrshlq_u32(a, b);
10638 }
10639
10640 // CHECK-LABEL: @test_vqrshlq_u64(
10641 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10642 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10643 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10644 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10645 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)10646 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
10647 return vqrshlq_u64(a, b);
10648 }
10649
10650 // CHECK-LABEL: @test_vqrshrn_n_s16(
10651 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10652 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10653 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10654 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)10655 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
10656 return vqrshrn_n_s16(a, 1);
10657 }
10658
10659 // CHECK-LABEL: @test_vqrshrn_n_s32(
10660 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10661 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10662 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10663 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)10664 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
10665 return vqrshrn_n_s32(a, 1);
10666 }
10667
10668 // CHECK-LABEL: @test_vqrshrn_n_s64(
10669 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10670 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10671 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10672 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)10673 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
10674 return vqrshrn_n_s64(a, 1);
10675 }
10676
10677 // CHECK-LABEL: @test_vqrshrn_n_u16(
10678 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10679 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10680 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10681 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)10682 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
10683 return vqrshrn_n_u16(a, 1);
10684 }
10685
10686 // CHECK-LABEL: @test_vqrshrn_n_u32(
10687 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10688 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10689 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10690 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)10691 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
10692 return vqrshrn_n_u32(a, 1);
10693 }
10694
10695 // CHECK-LABEL: @test_vqrshrn_n_u64(
10696 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10697 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10698 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10699 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)10700 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
10701 return vqrshrn_n_u64(a, 1);
10702 }
10703
10704 // CHECK-LABEL: @test_vqrshrun_n_s16(
10705 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10706 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10707 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10708 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)10709 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
10710 return vqrshrun_n_s16(a, 1);
10711 }
10712
10713 // CHECK-LABEL: @test_vqrshrun_n_s32(
10714 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10715 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10716 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10717 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)10718 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
10719 return vqrshrun_n_s32(a, 1);
10720 }
10721
10722 // CHECK-LABEL: @test_vqrshrun_n_s64(
10723 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10724 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10725 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
10726 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)10727 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
10728 return vqrshrun_n_s64(a, 1);
10729 }
10730
10731 // CHECK-LABEL: @test_vqshl_s8(
10732 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10733 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)10734 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
10735 return vqshl_s8(a, b);
10736 }
10737
10738 // CHECK-LABEL: @test_vqshl_s16(
10739 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10740 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10741 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10742 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10743 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_s16(int16x4_t a,int16x4_t b)10744 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
10745 return vqshl_s16(a, b);
10746 }
10747
10748 // CHECK-LABEL: @test_vqshl_s32(
10749 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10750 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10751 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10752 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10753 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_s32(int32x2_t a,int32x2_t b)10754 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
10755 return vqshl_s32(a, b);
10756 }
10757
10758 // CHECK-LABEL: @test_vqshl_s64(
10759 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10760 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10761 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10762 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10763 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_s64(int64x1_t a,int64x1_t b)10764 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
10765 return vqshl_s64(a, b);
10766 }
10767
10768 // CHECK-LABEL: @test_vqshl_u8(
10769 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10770 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)10771 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
10772 return vqshl_u8(a, b);
10773 }
10774
10775 // CHECK-LABEL: @test_vqshl_u16(
10776 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10777 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10778 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10779 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10780 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)10781 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
10782 return vqshl_u16(a, b);
10783 }
10784
10785 // CHECK-LABEL: @test_vqshl_u32(
10786 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10787 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10788 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10789 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10790 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)10791 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
10792 return vqshl_u32(a, b);
10793 }
10794
10795 // CHECK-LABEL: @test_vqshl_u64(
10796 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10797 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10798 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10799 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10800 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)10801 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
10802 return vqshl_u64(a, b);
10803 }
10804
10805 // CHECK-LABEL: @test_vqshlq_s8(
10806 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10807 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)10808 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
10809 return vqshlq_s8(a, b);
10810 }
10811
10812 // CHECK-LABEL: @test_vqshlq_s16(
10813 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10814 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10815 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10816 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10817 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)10818 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
10819 return vqshlq_s16(a, b);
10820 }
10821
10822 // CHECK-LABEL: @test_vqshlq_s32(
10823 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10824 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10825 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10826 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10827 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)10828 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
10829 return vqshlq_s32(a, b);
10830 }
10831
10832 // CHECK-LABEL: @test_vqshlq_s64(
10833 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10834 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10835 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10836 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10837 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)10838 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
10839 return vqshlq_s64(a, b);
10840 }
10841
10842 // CHECK-LABEL: @test_vqshlq_u8(
10843 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10844 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)10845 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
10846 return vqshlq_u8(a, b);
10847 }
10848
10849 // CHECK-LABEL: @test_vqshlq_u16(
10850 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10851 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10852 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10853 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10854 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)10855 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
10856 return vqshlq_u16(a, b);
10857 }
10858
10859 // CHECK-LABEL: @test_vqshlq_u32(
10860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10861 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10862 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10863 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10864 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)10865 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
10866 return vqshlq_u32(a, b);
10867 }
10868
10869 // CHECK-LABEL: @test_vqshlq_u64(
10870 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10871 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10872 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10873 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10874 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)10875 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
10876 return vqshlq_u64(a, b);
10877 }
10878
10879 // CHECK-LABEL: @test_vqshlu_n_s8(
10880 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10881 // CHECK: ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)10882 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
10883 return vqshlu_n_s8(a, 1);
10884 }
10885
10886 // CHECK-LABEL: @test_vqshlu_n_s16(
10887 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10888 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10889 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10890 // CHECK: ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)10891 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
10892 return vqshlu_n_s16(a, 1);
10893 }
10894
10895 // CHECK-LABEL: @test_vqshlu_n_s32(
10896 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10897 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10898 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 1, i32 1>)
10899 // CHECK: ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)10900 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
10901 return vqshlu_n_s32(a, 1);
10902 }
10903
10904 // CHECK-LABEL: @test_vqshlu_n_s64(
10905 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10906 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10907 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
10908 // CHECK: ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)10909 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
10910 return vqshlu_n_s64(a, 1);
10911 }
10912
10913 // CHECK-LABEL: @test_vqshluq_n_s8(
10914 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10915 // CHECK: ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)10916 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
10917 return vqshluq_n_s8(a, 1);
10918 }
10919
10920 // CHECK-LABEL: @test_vqshluq_n_s16(
10921 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10922 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10923 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
10924 // CHECK: ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)10925 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
10926 return vqshluq_n_s16(a, 1);
10927 }
10928
10929 // CHECK-LABEL: @test_vqshluq_n_s32(
10930 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10931 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10932 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
10933 // CHECK: ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)10934 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
10935 return vqshluq_n_s32(a, 1);
10936 }
10937
10938 // CHECK-LABEL: @test_vqshluq_n_s64(
10939 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10940 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10941 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 1, i64 1>)
10942 // CHECK: ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)10943 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
10944 return vqshluq_n_s64(a, 1);
10945 }
10946
10947 // CHECK-LABEL: @test_vqshl_n_s8(
10948 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10949 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)10950 int8x8_t test_vqshl_n_s8(int8x8_t a) {
10951 return vqshl_n_s8(a, 1);
10952 }
10953
10954 // CHECK-LABEL: @test_vqshl_n_s16(
10955 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10956 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10957 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10958 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)10959 int16x4_t test_vqshl_n_s16(int16x4_t a) {
10960 return vqshl_n_s16(a, 1);
10961 }
10962
10963 // CHECK-LABEL: @test_vqshl_n_s32(
10964 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10965 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10966 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
10967 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)10968 int32x2_t test_vqshl_n_s32(int32x2_t a) {
10969 return vqshl_n_s32(a, 1);
10970 }
10971
10972 // CHECK-LABEL: @test_vqshl_n_s64(
10973 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10974 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10975 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
10976 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)10977 int64x1_t test_vqshl_n_s64(int64x1_t a) {
10978 return vqshl_n_s64(a, 1);
10979 }
10980
10981 // CHECK-LABEL: @test_vqshl_n_u8(
10982 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10983 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)10984 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
10985 return vqshl_n_u8(a, 1);
10986 }
10987
10988 // CHECK-LABEL: @test_vqshl_n_u16(
10989 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10990 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10991 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10992 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)10993 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
10994 return vqshl_n_u16(a, 1);
10995 }
10996
10997 // CHECK-LABEL: @test_vqshl_n_u32(
10998 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10999 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11000 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
11001 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)11002 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
11003 return vqshl_n_u32(a, 1);
11004 }
11005
11006 // CHECK-LABEL: @test_vqshl_n_u64(
11007 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11008 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
11009 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
11010 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)11011 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
11012 return vqshl_n_u64(a, 1);
11013 }
11014
11015 // CHECK-LABEL: @test_vqshlq_n_s8(
11016 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
11017 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)11018 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
11019 return vqshlq_n_s8(a, 1);
11020 }
11021
11022 // CHECK-LABEL: @test_vqshlq_n_s16(
11023 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11024 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11025 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
11026 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)11027 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
11028 return vqshlq_n_s16(a, 1);
11029 }
11030
11031 // CHECK-LABEL: @test_vqshlq_n_s32(
11032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11033 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11034 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
11035 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)11036 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
11037 return vqshlq_n_s32(a, 1);
11038 }
11039
11040 // CHECK-LABEL: @test_vqshlq_n_s64(
11041 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11042 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11043 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
11044 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)11045 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
11046 return vqshlq_n_s64(a, 1);
11047 }
11048
11049 // CHECK-LABEL: @test_vqshlq_n_u8(
11050 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
11051 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)11052 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
11053 return vqshlq_n_u8(a, 1);
11054 }
11055
11056 // CHECK-LABEL: @test_vqshlq_n_u16(
11057 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11058 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11059 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
11060 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)11061 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
11062 return vqshlq_n_u16(a, 1);
11063 }
11064
11065 // CHECK-LABEL: @test_vqshlq_n_u32(
11066 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11067 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11068 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
11069 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)11070 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
11071 return vqshlq_n_u32(a, 1);
11072 }
11073
11074 // CHECK-LABEL: @test_vqshlq_n_u64(
11075 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11076 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11077 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
11078 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)11079 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
11080 return vqshlq_n_u64(a, 1);
11081 }
11082
11083 // CHECK-LABEL: @test_vqshrn_n_s16(
11084 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11085 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11086 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11087 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)11088 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
11089 return vqshrn_n_s16(a, 1);
11090 }
11091
11092 // CHECK-LABEL: @test_vqshrn_n_s32(
11093 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11094 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11095 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11096 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)11097 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
11098 return vqshrn_n_s32(a, 1);
11099 }
11100
11101 // CHECK-LABEL: @test_vqshrn_n_s64(
11102 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11103 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11104 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
11105 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)11106 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
11107 return vqshrn_n_s64(a, 1);
11108 }
11109
11110 // CHECK-LABEL: @test_vqshrn_n_u16(
11111 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11112 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11113 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11114 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)11115 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
11116 return vqshrn_n_u16(a, 1);
11117 }
11118
11119 // CHECK-LABEL: @test_vqshrn_n_u32(
11120 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11121 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11122 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11123 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)11124 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
11125 return vqshrn_n_u32(a, 1);
11126 }
11127
11128 // CHECK-LABEL: @test_vqshrn_n_u64(
11129 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11130 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11131 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
11132 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)11133 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
11134 return vqshrn_n_u64(a, 1);
11135 }
11136
11137 // CHECK-LABEL: @test_vqshrun_n_s16(
11138 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11139 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11140 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11141 // CHECK: ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)11142 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
11143 return vqshrun_n_s16(a, 1);
11144 }
11145
11146 // CHECK-LABEL: @test_vqshrun_n_s32(
11147 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11148 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11149 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11150 // CHECK: ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)11151 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
11152 return vqshrun_n_s32(a, 1);
11153 }
11154
11155 // CHECK-LABEL: @test_vqshrun_n_s64(
11156 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11157 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11158 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
11159 // CHECK: ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)11160 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
11161 return vqshrun_n_s64(a, 1);
11162 }
11163
11164 // CHECK-LABEL: @test_vqsub_s8(
11165 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
11166 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)11167 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
11168 return vqsub_s8(a, b);
11169 }
11170
11171 // CHECK-LABEL: @test_vqsub_s16(
11172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11173 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11174 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
11175 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
11176 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_s16(int16x4_t a,int16x4_t b)11177 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
11178 return vqsub_s16(a, b);
11179 }
11180
11181 // CHECK-LABEL: @test_vqsub_s32(
11182 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11183 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11184 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
11185 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
11186 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_s32(int32x2_t a,int32x2_t b)11187 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
11188 return vqsub_s32(a, b);
11189 }
11190
11191 // CHECK-LABEL: @test_vqsub_s64(
11192 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11193 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11194 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
11195 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
11196 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_s64(int64x1_t a,int64x1_t b)11197 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
11198 return vqsub_s64(a, b);
11199 }
11200
11201 // CHECK-LABEL: @test_vqsub_u8(
11202 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
11203 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)11204 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
11205 return vqsub_u8(a, b);
11206 }
11207
11208 // CHECK-LABEL: @test_vqsub_u16(
11209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11210 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11211 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
11212 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
11213 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)11214 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
11215 return vqsub_u16(a, b);
11216 }
11217
11218 // CHECK-LABEL: @test_vqsub_u32(
11219 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11220 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11221 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
11222 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
11223 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)11224 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
11225 return vqsub_u32(a, b);
11226 }
11227
11228 // CHECK-LABEL: @test_vqsub_u64(
11229 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11230 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11231 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
11232 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
11233 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)11234 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
11235 return vqsub_u64(a, b);
11236 }
11237
11238 // CHECK-LABEL: @test_vqsubq_s8(
11239 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
11240 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)11241 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
11242 return vqsubq_s8(a, b);
11243 }
11244
11245 // CHECK-LABEL: @test_vqsubq_s16(
11246 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11247 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11248 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
11249 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
11250 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)11251 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
11252 return vqsubq_s16(a, b);
11253 }
11254
11255 // CHECK-LABEL: @test_vqsubq_s32(
11256 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11257 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11258 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
11259 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
11260 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)11261 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
11262 return vqsubq_s32(a, b);
11263 }
11264
11265 // CHECK-LABEL: @test_vqsubq_s64(
11266 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11267 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11268 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
11269 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
11270 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)11271 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
11272 return vqsubq_s64(a, b);
11273 }
11274
11275 // CHECK-LABEL: @test_vqsubq_u8(
11276 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
11277 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)11278 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
11279 return vqsubq_u8(a, b);
11280 }
11281
11282 // CHECK-LABEL: @test_vqsubq_u16(
11283 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11284 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11285 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
11286 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
11287 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)11288 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
11289 return vqsubq_u16(a, b);
11290 }
11291
11292 // CHECK-LABEL: @test_vqsubq_u32(
11293 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11294 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11295 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
11296 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
11297 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)11298 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
11299 return vqsubq_u32(a, b);
11300 }
11301
11302 // CHECK-LABEL: @test_vqsubq_u64(
11303 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11304 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11305 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
11306 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
11307 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)11308 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
11309 return vqsubq_u64(a, b);
11310 }
11311
11312 // CHECK-LABEL: @test_vraddhn_s16(
11313 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11314 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11315 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
11316 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)11317 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
11318 return vraddhn_s16(a, b);
11319 }
11320
11321 // CHECK-LABEL: @test_vraddhn_s32(
11322 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11323 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11324 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
11325 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
11326 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)11327 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
11328 return vraddhn_s32(a, b);
11329 }
11330
11331 // CHECK-LABEL: @test_vraddhn_s64(
11332 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11333 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11334 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
11335 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
11336 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)11337 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
11338 return vraddhn_s64(a, b);
11339 }
11340
11341 // CHECK-LABEL: @test_vraddhn_u16(
11342 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11343 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11344 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
11345 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)11346 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
11347 return vraddhn_u16(a, b);
11348 }
11349
11350 // CHECK-LABEL: @test_vraddhn_u32(
11351 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11352 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11353 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
11354 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
11355 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)11356 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
11357 return vraddhn_u32(a, b);
11358 }
11359
11360 // CHECK-LABEL: @test_vraddhn_u64(
11361 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11362 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11363 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
11364 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
11365 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)11366 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
11367 return vraddhn_u64(a, b);
11368 }
11369
11370 // CHECK-LABEL: @test_vrecpe_f32(
11371 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11372 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a)
11373 // CHECK: ret <2 x float> [[VRECPE_V1_I]]
test_vrecpe_f32(float32x2_t a)11374 float32x2_t test_vrecpe_f32(float32x2_t a) {
11375 return vrecpe_f32(a);
11376 }
11377
11378 // CHECK-LABEL: @test_vrecpe_u32(
11379 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11380 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a)
11381 // CHECK: ret <2 x i32> [[VRECPE_V1_I]]
test_vrecpe_u32(uint32x2_t a)11382 uint32x2_t test_vrecpe_u32(uint32x2_t a) {
11383 return vrecpe_u32(a);
11384 }
11385
11386 // CHECK-LABEL: @test_vrecpeq_f32(
11387 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
11388 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a)
11389 // CHECK: ret <4 x float> [[VRECPEQ_V1_I]]
test_vrecpeq_f32(float32x4_t a)11390 float32x4_t test_vrecpeq_f32(float32x4_t a) {
11391 return vrecpeq_f32(a);
11392 }
11393
11394 // CHECK-LABEL: @test_vrecpeq_u32(
11395 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11396 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a)
11397 // CHECK: ret <4 x i32> [[VRECPEQ_V1_I]]
test_vrecpeq_u32(uint32x4_t a)11398 uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
11399 return vrecpeq_u32(a);
11400 }
11401
11402 // CHECK-LABEL: @test_vrecps_f32(
11403 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11404 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11405 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b)
11406 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
11407 // CHECK: ret <2 x float> [[VRECPS_V2_I]]
test_vrecps_f32(float32x2_t a,float32x2_t b)11408 float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) {
11409 return vrecps_f32(a, b);
11410 }
11411
11412 // CHECK-LABEL: @test_vrecpsq_f32(
11413 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
11414 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
11415 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b)
11416 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
11417 // CHECK: ret <4 x float> [[VRECPSQ_V2_I]]
test_vrecpsq_f32(float32x4_t a,float32x4_t b)11418 float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) {
11419 return vrecpsq_f32(a, b);
11420 }
11421
11422 // CHECK-LABEL: @test_vreinterpret_s8_s16(
11423 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11424 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)11425 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
11426 return vreinterpret_s8_s16(a);
11427 }
11428
11429 // CHECK-LABEL: @test_vreinterpret_s8_s32(
11430 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11431 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)11432 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
11433 return vreinterpret_s8_s32(a);
11434 }
11435
11436 // CHECK-LABEL: @test_vreinterpret_s8_s64(
11437 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11438 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)11439 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
11440 return vreinterpret_s8_s64(a);
11441 }
11442
11443 // CHECK-LABEL: @test_vreinterpret_s8_u8(
11444 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)11445 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
11446 return vreinterpret_s8_u8(a);
11447 }
11448
11449 // CHECK-LABEL: @test_vreinterpret_s8_u16(
11450 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11451 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)11452 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
11453 return vreinterpret_s8_u16(a);
11454 }
11455
11456 // CHECK-LABEL: @test_vreinterpret_s8_u32(
11457 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11458 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)11459 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
11460 return vreinterpret_s8_u32(a);
11461 }
11462
11463 // CHECK-LABEL: @test_vreinterpret_s8_u64(
11464 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11465 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)11466 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
11467 return vreinterpret_s8_u64(a);
11468 }
11469
11470 // CHECK-LABEL: @test_vreinterpret_s8_f16(
11471 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11472 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)11473 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
11474 return vreinterpret_s8_f16(a);
11475 }
11476
11477 // CHECK-LABEL: @test_vreinterpret_s8_f32(
11478 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11479 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)11480 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
11481 return vreinterpret_s8_f32(a);
11482 }
11483
11484 // CHECK-LABEL: @test_vreinterpret_s8_p8(
11485 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)11486 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
11487 return vreinterpret_s8_p8(a);
11488 }
11489
11490 // CHECK-LABEL: @test_vreinterpret_s8_p16(
11491 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11492 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)11493 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
11494 return vreinterpret_s8_p16(a);
11495 }
11496
11497 // CHECK-LABEL: @test_vreinterpret_s16_s8(
11498 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11499 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)11500 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
11501 return vreinterpret_s16_s8(a);
11502 }
11503
11504 // CHECK-LABEL: @test_vreinterpret_s16_s32(
11505 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11506 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)11507 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
11508 return vreinterpret_s16_s32(a);
11509 }
11510
11511 // CHECK-LABEL: @test_vreinterpret_s16_s64(
11512 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11513 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)11514 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
11515 return vreinterpret_s16_s64(a);
11516 }
11517
11518 // CHECK-LABEL: @test_vreinterpret_s16_u8(
11519 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11520 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)11521 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
11522 return vreinterpret_s16_u8(a);
11523 }
11524
11525 // CHECK-LABEL: @test_vreinterpret_s16_u16(
11526 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)11527 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
11528 return vreinterpret_s16_u16(a);
11529 }
11530
11531 // CHECK-LABEL: @test_vreinterpret_s16_u32(
11532 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11533 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)11534 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
11535 return vreinterpret_s16_u32(a);
11536 }
11537
11538 // CHECK-LABEL: @test_vreinterpret_s16_u64(
11539 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11540 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)11541 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
11542 return vreinterpret_s16_u64(a);
11543 }
11544
11545 // CHECK-LABEL: @test_vreinterpret_s16_f16(
11546 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11547 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)11548 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
11549 return vreinterpret_s16_f16(a);
11550 }
11551
11552 // CHECK-LABEL: @test_vreinterpret_s16_f32(
11553 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11554 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)11555 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
11556 return vreinterpret_s16_f32(a);
11557 }
11558
11559 // CHECK-LABEL: @test_vreinterpret_s16_p8(
11560 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11561 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)11562 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
11563 return vreinterpret_s16_p8(a);
11564 }
11565
11566 // CHECK-LABEL: @test_vreinterpret_s16_p16(
11567 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)11568 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
11569 return vreinterpret_s16_p16(a);
11570 }
11571
11572 // CHECK-LABEL: @test_vreinterpret_s32_s8(
11573 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11574 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)11575 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
11576 return vreinterpret_s32_s8(a);
11577 }
11578
11579 // CHECK-LABEL: @test_vreinterpret_s32_s16(
11580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11581 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)11582 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
11583 return vreinterpret_s32_s16(a);
11584 }
11585
11586 // CHECK-LABEL: @test_vreinterpret_s32_s64(
11587 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11588 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)11589 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
11590 return vreinterpret_s32_s64(a);
11591 }
11592
11593 // CHECK-LABEL: @test_vreinterpret_s32_u8(
11594 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11595 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)11596 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
11597 return vreinterpret_s32_u8(a);
11598 }
11599
11600 // CHECK-LABEL: @test_vreinterpret_s32_u16(
11601 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11602 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)11603 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
11604 return vreinterpret_s32_u16(a);
11605 }
11606
11607 // CHECK-LABEL: @test_vreinterpret_s32_u32(
11608 // CHECK: ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)11609 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
11610 return vreinterpret_s32_u32(a);
11611 }
11612
11613 // CHECK-LABEL: @test_vreinterpret_s32_u64(
11614 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11615 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)11616 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
11617 return vreinterpret_s32_u64(a);
11618 }
11619
11620 // CHECK-LABEL: @test_vreinterpret_s32_f16(
11621 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11622 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)11623 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
11624 return vreinterpret_s32_f16(a);
11625 }
11626
11627 // CHECK-LABEL: @test_vreinterpret_s32_f32(
11628 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11629 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)11630 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
11631 return vreinterpret_s32_f32(a);
11632 }
11633
11634 // CHECK-LABEL: @test_vreinterpret_s32_p8(
11635 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11636 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)11637 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
11638 return vreinterpret_s32_p8(a);
11639 }
11640
11641 // CHECK-LABEL: @test_vreinterpret_s32_p16(
11642 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11643 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)11644 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
11645 return vreinterpret_s32_p16(a);
11646 }
11647
11648 // CHECK-LABEL: @test_vreinterpret_s64_s8(
11649 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11650 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)11651 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
11652 return vreinterpret_s64_s8(a);
11653 }
11654
11655 // CHECK-LABEL: @test_vreinterpret_s64_s16(
11656 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11657 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)11658 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
11659 return vreinterpret_s64_s16(a);
11660 }
11661
11662 // CHECK-LABEL: @test_vreinterpret_s64_s32(
11663 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11664 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)11665 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
11666 return vreinterpret_s64_s32(a);
11667 }
11668
11669 // CHECK-LABEL: @test_vreinterpret_s64_u8(
11670 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11671 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)11672 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
11673 return vreinterpret_s64_u8(a);
11674 }
11675
11676 // CHECK-LABEL: @test_vreinterpret_s64_u16(
11677 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11678 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)11679 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
11680 return vreinterpret_s64_u16(a);
11681 }
11682
11683 // CHECK-LABEL: @test_vreinterpret_s64_u32(
11684 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11685 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)11686 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
11687 return vreinterpret_s64_u32(a);
11688 }
11689
11690 // CHECK-LABEL: @test_vreinterpret_s64_u64(
11691 // CHECK: ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)11692 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
11693 return vreinterpret_s64_u64(a);
11694 }
11695
11696 // CHECK-LABEL: @test_vreinterpret_s64_f16(
11697 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
11698 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)11699 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
11700 return vreinterpret_s64_f16(a);
11701 }
11702
11703 // CHECK-LABEL: @test_vreinterpret_s64_f32(
11704 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
11705 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)11706 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
11707 return vreinterpret_s64_f32(a);
11708 }
11709
11710 // CHECK-LABEL: @test_vreinterpret_s64_p8(
11711 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11712 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)11713 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
11714 return vreinterpret_s64_p8(a);
11715 }
11716
11717 // CHECK-LABEL: @test_vreinterpret_s64_p16(
11718 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11719 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)11720 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
11721 return vreinterpret_s64_p16(a);
11722 }
11723
11724 // CHECK-LABEL: @test_vreinterpret_u8_s8(
11725 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)11726 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
11727 return vreinterpret_u8_s8(a);
11728 }
11729
11730 // CHECK-LABEL: @test_vreinterpret_u8_s16(
11731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11732 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)11733 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
11734 return vreinterpret_u8_s16(a);
11735 }
11736
11737 // CHECK-LABEL: @test_vreinterpret_u8_s32(
11738 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11739 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)11740 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
11741 return vreinterpret_u8_s32(a);
11742 }
11743
11744 // CHECK-LABEL: @test_vreinterpret_u8_s64(
11745 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11746 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)11747 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
11748 return vreinterpret_u8_s64(a);
11749 }
11750
11751 // CHECK-LABEL: @test_vreinterpret_u8_u16(
11752 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11753 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)11754 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
11755 return vreinterpret_u8_u16(a);
11756 }
11757
11758 // CHECK-LABEL: @test_vreinterpret_u8_u32(
11759 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11760 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)11761 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
11762 return vreinterpret_u8_u32(a);
11763 }
11764
11765 // CHECK-LABEL: @test_vreinterpret_u8_u64(
11766 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11767 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)11768 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
11769 return vreinterpret_u8_u64(a);
11770 }
11771
11772 // CHECK-LABEL: @test_vreinterpret_u8_f16(
11773 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11774 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)11775 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
11776 return vreinterpret_u8_f16(a);
11777 }
11778
11779 // CHECK-LABEL: @test_vreinterpret_u8_f32(
11780 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11781 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)11782 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
11783 return vreinterpret_u8_f32(a);
11784 }
11785
11786 // CHECK-LABEL: @test_vreinterpret_u8_p8(
11787 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)11788 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
11789 return vreinterpret_u8_p8(a);
11790 }
11791
11792 // CHECK-LABEL: @test_vreinterpret_u8_p16(
11793 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11794 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)11795 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
11796 return vreinterpret_u8_p16(a);
11797 }
11798
11799 // CHECK-LABEL: @test_vreinterpret_u16_s8(
11800 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11801 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)11802 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
11803 return vreinterpret_u16_s8(a);
11804 }
11805
11806 // CHECK-LABEL: @test_vreinterpret_u16_s16(
11807 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)11808 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
11809 return vreinterpret_u16_s16(a);
11810 }
11811
11812 // CHECK-LABEL: @test_vreinterpret_u16_s32(
11813 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11814 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)11815 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
11816 return vreinterpret_u16_s32(a);
11817 }
11818
11819 // CHECK-LABEL: @test_vreinterpret_u16_s64(
11820 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11821 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)11822 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
11823 return vreinterpret_u16_s64(a);
11824 }
11825
11826 // CHECK-LABEL: @test_vreinterpret_u16_u8(
11827 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11828 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)11829 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
11830 return vreinterpret_u16_u8(a);
11831 }
11832
11833 // CHECK-LABEL: @test_vreinterpret_u16_u32(
11834 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11835 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)11836 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
11837 return vreinterpret_u16_u32(a);
11838 }
11839
11840 // CHECK-LABEL: @test_vreinterpret_u16_u64(
11841 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11842 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)11843 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
11844 return vreinterpret_u16_u64(a);
11845 }
11846
11847 // CHECK-LABEL: @test_vreinterpret_u16_f16(
11848 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11849 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)11850 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
11851 return vreinterpret_u16_f16(a);
11852 }
11853
11854 // CHECK-LABEL: @test_vreinterpret_u16_f32(
11855 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11856 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)11857 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
11858 return vreinterpret_u16_f32(a);
11859 }
11860
11861 // CHECK-LABEL: @test_vreinterpret_u16_p8(
11862 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11863 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)11864 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
11865 return vreinterpret_u16_p8(a);
11866 }
11867
11868 // CHECK-LABEL: @test_vreinterpret_u16_p16(
11869 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)11870 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
11871 return vreinterpret_u16_p16(a);
11872 }
11873
11874 // CHECK-LABEL: @test_vreinterpret_u32_s8(
11875 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11876 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)11877 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
11878 return vreinterpret_u32_s8(a);
11879 }
11880
11881 // CHECK-LABEL: @test_vreinterpret_u32_s16(
11882 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11883 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)11884 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
11885 return vreinterpret_u32_s16(a);
11886 }
11887
11888 // CHECK-LABEL: @test_vreinterpret_u32_s32(
11889 // CHECK: ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)11890 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
11891 return vreinterpret_u32_s32(a);
11892 }
11893
11894 // CHECK-LABEL: @test_vreinterpret_u32_s64(
11895 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11896 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)11897 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
11898 return vreinterpret_u32_s64(a);
11899 }
11900
11901 // CHECK-LABEL: @test_vreinterpret_u32_u8(
11902 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11903 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)11904 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
11905 return vreinterpret_u32_u8(a);
11906 }
11907
11908 // CHECK-LABEL: @test_vreinterpret_u32_u16(
11909 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11910 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)11911 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
11912 return vreinterpret_u32_u16(a);
11913 }
11914
11915 // CHECK-LABEL: @test_vreinterpret_u32_u64(
11916 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11917 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)11918 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
11919 return vreinterpret_u32_u64(a);
11920 }
11921
11922 // CHECK-LABEL: @test_vreinterpret_u32_f16(
11923 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11924 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)11925 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
11926 return vreinterpret_u32_f16(a);
11927 }
11928
11929 // CHECK-LABEL: @test_vreinterpret_u32_f32(
11930 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11931 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)11932 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
11933 return vreinterpret_u32_f32(a);
11934 }
11935
11936 // CHECK-LABEL: @test_vreinterpret_u32_p8(
11937 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11938 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)11939 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
11940 return vreinterpret_u32_p8(a);
11941 }
11942
11943 // CHECK-LABEL: @test_vreinterpret_u32_p16(
11944 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11945 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)11946 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
11947 return vreinterpret_u32_p16(a);
11948 }
11949
11950 // CHECK-LABEL: @test_vreinterpret_u64_s8(
11951 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11952 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)11953 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
11954 return vreinterpret_u64_s8(a);
11955 }
11956
11957 // CHECK-LABEL: @test_vreinterpret_u64_s16(
11958 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11959 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)11960 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
11961 return vreinterpret_u64_s16(a);
11962 }
11963
11964 // CHECK-LABEL: @test_vreinterpret_u64_s32(
11965 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11966 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)11967 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
11968 return vreinterpret_u64_s32(a);
11969 }
11970
11971 // CHECK-LABEL: @test_vreinterpret_u64_s64(
11972 // CHECK: ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)11973 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
11974 return vreinterpret_u64_s64(a);
11975 }
11976
11977 // CHECK-LABEL: @test_vreinterpret_u64_u8(
11978 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11979 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)11980 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
11981 return vreinterpret_u64_u8(a);
11982 }
11983
11984 // CHECK-LABEL: @test_vreinterpret_u64_u16(
11985 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11986 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)11987 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
11988 return vreinterpret_u64_u16(a);
11989 }
11990
11991 // CHECK-LABEL: @test_vreinterpret_u64_u32(
11992 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11993 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)11994 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
11995 return vreinterpret_u64_u32(a);
11996 }
11997
11998 // CHECK-LABEL: @test_vreinterpret_u64_f16(
11999 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
12000 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)12001 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
12002 return vreinterpret_u64_f16(a);
12003 }
12004
12005 // CHECK-LABEL: @test_vreinterpret_u64_f32(
12006 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
12007 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)12008 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
12009 return vreinterpret_u64_f32(a);
12010 }
12011
12012 // CHECK-LABEL: @test_vreinterpret_u64_p8(
12013 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
12014 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)12015 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
12016 return vreinterpret_u64_p8(a);
12017 }
12018
12019 // CHECK-LABEL: @test_vreinterpret_u64_p16(
12020 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
12021 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)12022 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
12023 return vreinterpret_u64_p16(a);
12024 }
12025
12026 // CHECK-LABEL: @test_vreinterpret_f16_s8(
12027 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12028 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)12029 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
12030 return vreinterpret_f16_s8(a);
12031 }
12032
12033 // CHECK-LABEL: @test_vreinterpret_f16_s16(
12034 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12035 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)12036 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
12037 return vreinterpret_f16_s16(a);
12038 }
12039
12040 // CHECK-LABEL: @test_vreinterpret_f16_s32(
12041 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
12042 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)12043 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
12044 return vreinterpret_f16_s32(a);
12045 }
12046
12047 // CHECK-LABEL: @test_vreinterpret_f16_s64(
12048 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
12049 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)12050 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
12051 return vreinterpret_f16_s64(a);
12052 }
12053
12054 // CHECK-LABEL: @test_vreinterpret_f16_u8(
12055 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12056 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)12057 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
12058 return vreinterpret_f16_u8(a);
12059 }
12060
12061 // CHECK-LABEL: @test_vreinterpret_f16_u16(
12062 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12063 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)12064 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
12065 return vreinterpret_f16_u16(a);
12066 }
12067
12068 // CHECK-LABEL: @test_vreinterpret_f16_u32(
12069 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
12070 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)12071 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
12072 return vreinterpret_f16_u32(a);
12073 }
12074
12075 // CHECK-LABEL: @test_vreinterpret_f16_u64(
12076 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
12077 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)12078 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
12079 return vreinterpret_f16_u64(a);
12080 }
12081
12082 // CHECK-LABEL: @test_vreinterpret_f16_f32(
12083 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
12084 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)12085 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
12086 return vreinterpret_f16_f32(a);
12087 }
12088
12089 // CHECK-LABEL: @test_vreinterpret_f16_p8(
12090 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12091 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)12092 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
12093 return vreinterpret_f16_p8(a);
12094 }
12095
12096 // CHECK-LABEL: @test_vreinterpret_f16_p16(
12097 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12098 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)12099 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
12100 return vreinterpret_f16_p16(a);
12101 }
12102
12103 // CHECK-LABEL: @test_vreinterpret_f32_s8(
12104 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12105 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)12106 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
12107 return vreinterpret_f32_s8(a);
12108 }
12109
12110 // CHECK-LABEL: @test_vreinterpret_f32_s16(
12111 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12112 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)12113 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
12114 return vreinterpret_f32_s16(a);
12115 }
12116
12117 // CHECK-LABEL: @test_vreinterpret_f32_s32(
12118 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
12119 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)12120 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
12121 return vreinterpret_f32_s32(a);
12122 }
12123
12124 // CHECK-LABEL: @test_vreinterpret_f32_s64(
12125 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
12126 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)12127 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
12128 return vreinterpret_f32_s64(a);
12129 }
12130
12131 // CHECK-LABEL: @test_vreinterpret_f32_u8(
12132 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12133 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)12134 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
12135 return vreinterpret_f32_u8(a);
12136 }
12137
12138 // CHECK-LABEL: @test_vreinterpret_f32_u16(
12139 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12140 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)12141 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
12142 return vreinterpret_f32_u16(a);
12143 }
12144
12145 // CHECK-LABEL: @test_vreinterpret_f32_u32(
12146 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
12147 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)12148 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
12149 return vreinterpret_f32_u32(a);
12150 }
12151
12152 // CHECK-LABEL: @test_vreinterpret_f32_u64(
12153 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
12154 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)12155 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
12156 return vreinterpret_f32_u64(a);
12157 }
12158
12159 // CHECK-LABEL: @test_vreinterpret_f32_f16(
12160 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
12161 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)12162 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
12163 return vreinterpret_f32_f16(a);
12164 }
12165
12166 // CHECK-LABEL: @test_vreinterpret_f32_p8(
12167 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12168 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)12169 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
12170 return vreinterpret_f32_p8(a);
12171 }
12172
12173 // CHECK-LABEL: @test_vreinterpret_f32_p16(
12174 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12175 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)12176 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
12177 return vreinterpret_f32_p16(a);
12178 }
12179
12180 // CHECK-LABEL: @test_vreinterpret_p8_s8(
12181 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)12182 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
12183 return vreinterpret_p8_s8(a);
12184 }
12185
12186 // CHECK-LABEL: @test_vreinterpret_p8_s16(
12187 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12188 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)12189 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
12190 return vreinterpret_p8_s16(a);
12191 }
12192
12193 // CHECK-LABEL: @test_vreinterpret_p8_s32(
12194 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12195 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)12196 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
12197 return vreinterpret_p8_s32(a);
12198 }
12199
12200 // CHECK-LABEL: @test_vreinterpret_p8_s64(
12201 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12202 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)12203 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
12204 return vreinterpret_p8_s64(a);
12205 }
12206
12207 // CHECK-LABEL: @test_vreinterpret_p8_u8(
12208 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)12209 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
12210 return vreinterpret_p8_u8(a);
12211 }
12212
12213 // CHECK-LABEL: @test_vreinterpret_p8_u16(
12214 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12215 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)12216 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
12217 return vreinterpret_p8_u16(a);
12218 }
12219
12220 // CHECK-LABEL: @test_vreinterpret_p8_u32(
12221 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12222 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)12223 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
12224 return vreinterpret_p8_u32(a);
12225 }
12226
12227 // CHECK-LABEL: @test_vreinterpret_p8_u64(
12228 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12229 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)12230 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
12231 return vreinterpret_p8_u64(a);
12232 }
12233
12234 // CHECK-LABEL: @test_vreinterpret_p8_f16(
12235 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
12236 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)12237 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
12238 return vreinterpret_p8_f16(a);
12239 }
12240
12241 // CHECK-LABEL: @test_vreinterpret_p8_f32(
12242 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
12243 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)12244 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
12245 return vreinterpret_p8_f32(a);
12246 }
12247
12248 // CHECK-LABEL: @test_vreinterpret_p8_p16(
12249 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12250 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)12251 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
12252 return vreinterpret_p8_p16(a);
12253 }
12254
12255 // CHECK-LABEL: @test_vreinterpret_p16_s8(
12256 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12257 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)12258 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
12259 return vreinterpret_p16_s8(a);
12260 }
12261
12262 // CHECK-LABEL: @test_vreinterpret_p16_s16(
12263 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)12264 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
12265 return vreinterpret_p16_s16(a);
12266 }
12267
12268 // CHECK-LABEL: @test_vreinterpret_p16_s32(
12269 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
12270 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)12271 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
12272 return vreinterpret_p16_s32(a);
12273 }
12274
12275 // CHECK-LABEL: @test_vreinterpret_p16_s64(
12276 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
12277 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)12278 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
12279 return vreinterpret_p16_s64(a);
12280 }
12281
12282 // CHECK-LABEL: @test_vreinterpret_p16_u8(
12283 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12284 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)12285 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
12286 return vreinterpret_p16_u8(a);
12287 }
12288
12289 // CHECK-LABEL: @test_vreinterpret_p16_u16(
12290 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)12291 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
12292 return vreinterpret_p16_u16(a);
12293 }
12294
12295 // CHECK-LABEL: @test_vreinterpret_p16_u32(
12296 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
12297 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)12298 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
12299 return vreinterpret_p16_u32(a);
12300 }
12301
12302 // CHECK-LABEL: @test_vreinterpret_p16_u64(
12303 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
12304 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)12305 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
12306 return vreinterpret_p16_u64(a);
12307 }
12308
12309 // CHECK-LABEL: @test_vreinterpret_p16_f16(
12310 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
12311 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)12312 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
12313 return vreinterpret_p16_f16(a);
12314 }
12315
12316 // CHECK-LABEL: @test_vreinterpret_p16_f32(
12317 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
12318 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)12319 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
12320 return vreinterpret_p16_f32(a);
12321 }
12322
12323 // CHECK-LABEL: @test_vreinterpret_p16_p8(
12324 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12325 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)12326 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
12327 return vreinterpret_p16_p8(a);
12328 }
12329
12330 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
12331 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12332 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)12333 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
12334 return vreinterpretq_s8_s16(a);
12335 }
12336
12337 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
12338 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12339 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)12340 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
12341 return vreinterpretq_s8_s32(a);
12342 }
12343
12344 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
12345 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12346 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)12347 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
12348 return vreinterpretq_s8_s64(a);
12349 }
12350
12351 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
12352 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)12353 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
12354 return vreinterpretq_s8_u8(a);
12355 }
12356
12357 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
12358 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12359 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)12360 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
12361 return vreinterpretq_s8_u16(a);
12362 }
12363
12364 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
12365 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12366 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)12367 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
12368 return vreinterpretq_s8_u32(a);
12369 }
12370
12371 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
12372 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12373 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)12374 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
12375 return vreinterpretq_s8_u64(a);
12376 }
12377
12378 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
12379 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12380 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)12381 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
12382 return vreinterpretq_s8_f16(a);
12383 }
12384
12385 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
12386 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12387 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)12388 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
12389 return vreinterpretq_s8_f32(a);
12390 }
12391
12392 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
12393 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)12394 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
12395 return vreinterpretq_s8_p8(a);
12396 }
12397
12398 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
12399 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12400 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)12401 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
12402 return vreinterpretq_s8_p16(a);
12403 }
12404
12405 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
12406 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12407 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)12408 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
12409 return vreinterpretq_s16_s8(a);
12410 }
12411
12412 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
12413 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12414 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)12415 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
12416 return vreinterpretq_s16_s32(a);
12417 }
12418
12419 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
12420 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12421 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)12422 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
12423 return vreinterpretq_s16_s64(a);
12424 }
12425
12426 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
12427 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12428 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)12429 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
12430 return vreinterpretq_s16_u8(a);
12431 }
12432
12433 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
12434 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)12435 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
12436 return vreinterpretq_s16_u16(a);
12437 }
12438
12439 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
12440 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12441 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)12442 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
12443 return vreinterpretq_s16_u32(a);
12444 }
12445
12446 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
12447 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12448 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)12449 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
12450 return vreinterpretq_s16_u64(a);
12451 }
12452
12453 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
12454 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12455 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)12456 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
12457 return vreinterpretq_s16_f16(a);
12458 }
12459
12460 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
12461 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12462 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)12463 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
12464 return vreinterpretq_s16_f32(a);
12465 }
12466
12467 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
12468 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12469 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)12470 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
12471 return vreinterpretq_s16_p8(a);
12472 }
12473
12474 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
12475 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)12476 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
12477 return vreinterpretq_s16_p16(a);
12478 }
12479
12480 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
12481 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12482 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)12483 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
12484 return vreinterpretq_s32_s8(a);
12485 }
12486
12487 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
12488 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12489 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)12490 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
12491 return vreinterpretq_s32_s16(a);
12492 }
12493
12494 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
12495 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12496 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)12497 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
12498 return vreinterpretq_s32_s64(a);
12499 }
12500
12501 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
12502 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12503 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)12504 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
12505 return vreinterpretq_s32_u8(a);
12506 }
12507
12508 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
12509 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12510 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)12511 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
12512 return vreinterpretq_s32_u16(a);
12513 }
12514
12515 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
12516 // CHECK: ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)12517 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
12518 return vreinterpretq_s32_u32(a);
12519 }
12520
12521 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
12522 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12523 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)12524 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
12525 return vreinterpretq_s32_u64(a);
12526 }
12527
12528 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
12529 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12530 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)12531 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
12532 return vreinterpretq_s32_f16(a);
12533 }
12534
12535 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
12536 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12537 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)12538 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
12539 return vreinterpretq_s32_f32(a);
12540 }
12541
12542 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
12543 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12544 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)12545 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
12546 return vreinterpretq_s32_p8(a);
12547 }
12548
12549 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
12550 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12551 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)12552 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
12553 return vreinterpretq_s32_p16(a);
12554 }
12555
12556 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
12557 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12558 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)12559 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
12560 return vreinterpretq_s64_s8(a);
12561 }
12562
12563 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
12564 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12565 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)12566 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
12567 return vreinterpretq_s64_s16(a);
12568 }
12569
12570 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
12571 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12572 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)12573 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
12574 return vreinterpretq_s64_s32(a);
12575 }
12576
12577 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
12578 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12579 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)12580 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
12581 return vreinterpretq_s64_u8(a);
12582 }
12583
12584 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
12585 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12586 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)12587 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
12588 return vreinterpretq_s64_u16(a);
12589 }
12590
12591 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
12592 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12593 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)12594 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
12595 return vreinterpretq_s64_u32(a);
12596 }
12597
12598 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
12599 // CHECK: ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)12600 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
12601 return vreinterpretq_s64_u64(a);
12602 }
12603
12604 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
12605 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12606 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)12607 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
12608 return vreinterpretq_s64_f16(a);
12609 }
12610
12611 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
12612 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12613 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)12614 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
12615 return vreinterpretq_s64_f32(a);
12616 }
12617
12618 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
12619 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12620 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)12621 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
12622 return vreinterpretq_s64_p8(a);
12623 }
12624
12625 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
12626 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12627 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)12628 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
12629 return vreinterpretq_s64_p16(a);
12630 }
12631
12632 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
12633 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)12634 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
12635 return vreinterpretq_u8_s8(a);
12636 }
12637
12638 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
12639 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12640 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)12641 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
12642 return vreinterpretq_u8_s16(a);
12643 }
12644
12645 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
12646 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12647 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)12648 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
12649 return vreinterpretq_u8_s32(a);
12650 }
12651
12652 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
12653 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12654 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)12655 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
12656 return vreinterpretq_u8_s64(a);
12657 }
12658
12659 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
12660 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12661 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)12662 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
12663 return vreinterpretq_u8_u16(a);
12664 }
12665
12666 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
12667 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12668 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)12669 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
12670 return vreinterpretq_u8_u32(a);
12671 }
12672
12673 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
12674 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12675 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)12676 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
12677 return vreinterpretq_u8_u64(a);
12678 }
12679
12680 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
12681 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12682 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)12683 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
12684 return vreinterpretq_u8_f16(a);
12685 }
12686
12687 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
12688 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12689 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)12690 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
12691 return vreinterpretq_u8_f32(a);
12692 }
12693
12694 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
12695 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)12696 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
12697 return vreinterpretq_u8_p8(a);
12698 }
12699
12700 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
12701 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12702 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)12703 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
12704 return vreinterpretq_u8_p16(a);
12705 }
12706
12707 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
12708 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12709 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)12710 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
12711 return vreinterpretq_u16_s8(a);
12712 }
12713
12714 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
12715 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)12716 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
12717 return vreinterpretq_u16_s16(a);
12718 }
12719
12720 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
12721 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12722 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)12723 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
12724 return vreinterpretq_u16_s32(a);
12725 }
12726
12727 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
12728 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12729 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)12730 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
12731 return vreinterpretq_u16_s64(a);
12732 }
12733
12734 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
12735 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12736 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)12737 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
12738 return vreinterpretq_u16_u8(a);
12739 }
12740
12741 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
12742 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12743 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)12744 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
12745 return vreinterpretq_u16_u32(a);
12746 }
12747
12748 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
12749 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12750 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)12751 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
12752 return vreinterpretq_u16_u64(a);
12753 }
12754
12755 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
12756 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12757 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)12758 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
12759 return vreinterpretq_u16_f16(a);
12760 }
12761
12762 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
12763 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12764 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)12765 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
12766 return vreinterpretq_u16_f32(a);
12767 }
12768
12769 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
12770 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12771 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)12772 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
12773 return vreinterpretq_u16_p8(a);
12774 }
12775
12776 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
12777 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)12778 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
12779 return vreinterpretq_u16_p16(a);
12780 }
12781
12782 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
12783 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12784 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)12785 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
12786 return vreinterpretq_u32_s8(a);
12787 }
12788
12789 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
12790 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12791 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)12792 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
12793 return vreinterpretq_u32_s16(a);
12794 }
12795
12796 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
12797 // CHECK: ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)12798 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
12799 return vreinterpretq_u32_s32(a);
12800 }
12801
12802 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
12803 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12804 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)12805 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
12806 return vreinterpretq_u32_s64(a);
12807 }
12808
12809 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
12810 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12811 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)12812 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
12813 return vreinterpretq_u32_u8(a);
12814 }
12815
12816 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
12817 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12818 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)12819 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
12820 return vreinterpretq_u32_u16(a);
12821 }
12822
12823 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
12824 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12825 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)12826 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
12827 return vreinterpretq_u32_u64(a);
12828 }
12829
12830 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
12831 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12832 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)12833 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
12834 return vreinterpretq_u32_f16(a);
12835 }
12836
12837 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
12838 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12839 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)12840 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
12841 return vreinterpretq_u32_f32(a);
12842 }
12843
12844 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
12845 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12846 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)12847 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
12848 return vreinterpretq_u32_p8(a);
12849 }
12850
12851 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
12852 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12853 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)12854 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
12855 return vreinterpretq_u32_p16(a);
12856 }
12857
12858 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
12859 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12860 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)12861 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
12862 return vreinterpretq_u64_s8(a);
12863 }
12864
12865 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
12866 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12867 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)12868 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
12869 return vreinterpretq_u64_s16(a);
12870 }
12871
12872 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
12873 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12874 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)12875 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
12876 return vreinterpretq_u64_s32(a);
12877 }
12878
12879 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
12880 // CHECK: ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)12881 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
12882 return vreinterpretq_u64_s64(a);
12883 }
12884
12885 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
12886 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12887 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)12888 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
12889 return vreinterpretq_u64_u8(a);
12890 }
12891
12892 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
12893 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12894 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)12895 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
12896 return vreinterpretq_u64_u16(a);
12897 }
12898
12899 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
12900 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12901 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)12902 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
12903 return vreinterpretq_u64_u32(a);
12904 }
12905
12906 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
12907 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12908 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)12909 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
12910 return vreinterpretq_u64_f16(a);
12911 }
12912
12913 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
12914 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12915 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)12916 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
12917 return vreinterpretq_u64_f32(a);
12918 }
12919
12920 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
12921 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12922 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)12923 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
12924 return vreinterpretq_u64_p8(a);
12925 }
12926
12927 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
12928 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12929 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)12930 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
12931 return vreinterpretq_u64_p16(a);
12932 }
12933
12934 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
12935 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12936 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)12937 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
12938 return vreinterpretq_f16_s8(a);
12939 }
12940
12941 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
12942 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12943 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)12944 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
12945 return vreinterpretq_f16_s16(a);
12946 }
12947
12948 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
12949 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12950 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)12951 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
12952 return vreinterpretq_f16_s32(a);
12953 }
12954
12955 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
12956 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12957 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)12958 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
12959 return vreinterpretq_f16_s64(a);
12960 }
12961
12962 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
12963 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12964 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)12965 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
12966 return vreinterpretq_f16_u8(a);
12967 }
12968
12969 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
12970 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12971 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)12972 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
12973 return vreinterpretq_f16_u16(a);
12974 }
12975
12976 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
12977 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12978 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)12979 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
12980 return vreinterpretq_f16_u32(a);
12981 }
12982
12983 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
12984 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12985 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)12986 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
12987 return vreinterpretq_f16_u64(a);
12988 }
12989
12990 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
12991 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
12992 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)12993 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
12994 return vreinterpretq_f16_f32(a);
12995 }
12996
12997 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
12998 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12999 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)13000 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
13001 return vreinterpretq_f16_p8(a);
13002 }
13003
13004 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
13005 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
13006 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)13007 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
13008 return vreinterpretq_f16_p16(a);
13009 }
13010
13011 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
13012 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
13013 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)13014 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
13015 return vreinterpretq_f32_s8(a);
13016 }
13017
13018 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
13019 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
13020 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)13021 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
13022 return vreinterpretq_f32_s16(a);
13023 }
13024
13025 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
13026 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
13027 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)13028 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
13029 return vreinterpretq_f32_s32(a);
13030 }
13031
13032 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
13033 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
13034 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)13035 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
13036 return vreinterpretq_f32_s64(a);
13037 }
13038
13039 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
13040 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
13041 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)13042 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
13043 return vreinterpretq_f32_u8(a);
13044 }
13045
13046 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
13047 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
13048 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)13049 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
13050 return vreinterpretq_f32_u16(a);
13051 }
13052
13053 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
13054 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
13055 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)13056 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
13057 return vreinterpretq_f32_u32(a);
13058 }
13059
13060 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
13061 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
13062 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)13063 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
13064 return vreinterpretq_f32_u64(a);
13065 }
13066
13067 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
13068 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
13069 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)13070 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
13071 return vreinterpretq_f32_f16(a);
13072 }
13073
13074 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
13075 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
13076 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)13077 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
13078 return vreinterpretq_f32_p8(a);
13079 }
13080
13081 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
13082 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
13083 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)13084 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
13085 return vreinterpretq_f32_p16(a);
13086 }
13087
13088 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
13089 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)13090 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
13091 return vreinterpretq_p8_s8(a);
13092 }
13093
13094 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
13095 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13096 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)13097 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
13098 return vreinterpretq_p8_s16(a);
13099 }
13100
13101 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
13102 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13103 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)13104 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
13105 return vreinterpretq_p8_s32(a);
13106 }
13107
13108 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
13109 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13110 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)13111 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
13112 return vreinterpretq_p8_s64(a);
13113 }
13114
13115 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
13116 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)13117 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
13118 return vreinterpretq_p8_u8(a);
13119 }
13120
13121 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
13122 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13123 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)13124 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
13125 return vreinterpretq_p8_u16(a);
13126 }
13127
13128 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
13129 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13130 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)13131 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
13132 return vreinterpretq_p8_u32(a);
13133 }
13134
13135 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
13136 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13137 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)13138 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
13139 return vreinterpretq_p8_u64(a);
13140 }
13141
13142 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
13143 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
13144 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)13145 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
13146 return vreinterpretq_p8_f16(a);
13147 }
13148
13149 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
13150 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13151 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)13152 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
13153 return vreinterpretq_p8_f32(a);
13154 }
13155
13156 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
13157 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13158 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)13159 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
13160 return vreinterpretq_p8_p16(a);
13161 }
13162
13163 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
13164 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13165 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)13166 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
13167 return vreinterpretq_p16_s8(a);
13168 }
13169
13170 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
13171 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)13172 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
13173 return vreinterpretq_p16_s16(a);
13174 }
13175
13176 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
13177 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
13178 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)13179 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
13180 return vreinterpretq_p16_s32(a);
13181 }
13182
13183 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
13184 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
13185 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)13186 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
13187 return vreinterpretq_p16_s64(a);
13188 }
13189
13190 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
13191 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13192 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)13193 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
13194 return vreinterpretq_p16_u8(a);
13195 }
13196
13197 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
13198 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)13199 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
13200 return vreinterpretq_p16_u16(a);
13201 }
13202
13203 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
13204 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
13205 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)13206 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
13207 return vreinterpretq_p16_u32(a);
13208 }
13209
13210 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
13211 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
13212 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)13213 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
13214 return vreinterpretq_p16_u64(a);
13215 }
13216
13217 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
13218 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
13219 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)13220 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
13221 return vreinterpretq_p16_f16(a);
13222 }
13223
13224 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
13225 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
13226 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)13227 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
13228 return vreinterpretq_p16_f32(a);
13229 }
13230
13231 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
13232 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13233 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)13234 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
13235 return vreinterpretq_p16_p8(a);
13236 }
13237
13238 // CHECK-LABEL: @test_vrev16_s8(
13239 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13240 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_s8(int8x8_t a)13241 int8x8_t test_vrev16_s8(int8x8_t a) {
13242 return vrev16_s8(a);
13243 }
13244
13245 // CHECK-LABEL: @test_vrev16_u8(
13246 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13247 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_u8(uint8x8_t a)13248 uint8x8_t test_vrev16_u8(uint8x8_t a) {
13249 return vrev16_u8(a);
13250 }
13251
13252 // CHECK-LABEL: @test_vrev16_p8(
13253 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13254 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_p8(poly8x8_t a)13255 poly8x8_t test_vrev16_p8(poly8x8_t a) {
13256 return vrev16_p8(a);
13257 }
13258
13259 // CHECK-LABEL: @test_vrev16q_s8(
13260 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13261 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_s8(int8x16_t a)13262 int8x16_t test_vrev16q_s8(int8x16_t a) {
13263 return vrev16q_s8(a);
13264 }
13265
13266 // CHECK-LABEL: @test_vrev16q_u8(
13267 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13268 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_u8(uint8x16_t a)13269 uint8x16_t test_vrev16q_u8(uint8x16_t a) {
13270 return vrev16q_u8(a);
13271 }
13272
13273 // CHECK-LABEL: @test_vrev16q_p8(
13274 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13275 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_p8(poly8x16_t a)13276 poly8x16_t test_vrev16q_p8(poly8x16_t a) {
13277 return vrev16q_p8(a);
13278 }
13279
13280 // CHECK-LABEL: @test_vrev32_s8(
13281 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13282 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_s8(int8x8_t a)13283 int8x8_t test_vrev32_s8(int8x8_t a) {
13284 return vrev32_s8(a);
13285 }
13286
13287 // CHECK-LABEL: @test_vrev32_s16(
13288 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13289 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_s16(int16x4_t a)13290 int16x4_t test_vrev32_s16(int16x4_t a) {
13291 return vrev32_s16(a);
13292 }
13293
13294 // CHECK-LABEL: @test_vrev32_u8(
13295 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13296 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_u8(uint8x8_t a)13297 uint8x8_t test_vrev32_u8(uint8x8_t a) {
13298 return vrev32_u8(a);
13299 }
13300
13301 // CHECK-LABEL: @test_vrev32_u16(
13302 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13303 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_u16(uint16x4_t a)13304 uint16x4_t test_vrev32_u16(uint16x4_t a) {
13305 return vrev32_u16(a);
13306 }
13307
13308 // CHECK-LABEL: @test_vrev32_p8(
13309 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13310 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_p8(poly8x8_t a)13311 poly8x8_t test_vrev32_p8(poly8x8_t a) {
13312 return vrev32_p8(a);
13313 }
13314
13315 // CHECK-LABEL: @test_vrev32_p16(
13316 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13317 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_p16(poly16x4_t a)13318 poly16x4_t test_vrev32_p16(poly16x4_t a) {
13319 return vrev32_p16(a);
13320 }
13321
13322 // CHECK-LABEL: @test_vrev32q_s8(
13323 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13324 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_s8(int8x16_t a)13325 int8x16_t test_vrev32q_s8(int8x16_t a) {
13326 return vrev32q_s8(a);
13327 }
13328
13329 // CHECK-LABEL: @test_vrev32q_s16(
13330 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13331 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_s16(int16x8_t a)13332 int16x8_t test_vrev32q_s16(int16x8_t a) {
13333 return vrev32q_s16(a);
13334 }
13335
13336 // CHECK-LABEL: @test_vrev32q_u8(
13337 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13338 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_u8(uint8x16_t a)13339 uint8x16_t test_vrev32q_u8(uint8x16_t a) {
13340 return vrev32q_u8(a);
13341 }
13342
13343 // CHECK-LABEL: @test_vrev32q_u16(
13344 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13345 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_u16(uint16x8_t a)13346 uint16x8_t test_vrev32q_u16(uint16x8_t a) {
13347 return vrev32q_u16(a);
13348 }
13349
13350 // CHECK-LABEL: @test_vrev32q_p8(
13351 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13352 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_p8(poly8x16_t a)13353 poly8x16_t test_vrev32q_p8(poly8x16_t a) {
13354 return vrev32q_p8(a);
13355 }
13356
13357 // CHECK-LABEL: @test_vrev32q_p16(
13358 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13359 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_p16(poly16x8_t a)13360 poly16x8_t test_vrev32q_p16(poly16x8_t a) {
13361 return vrev32q_p16(a);
13362 }
13363
13364 // CHECK-LABEL: @test_vrev64_s8(
13365 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13366 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_s8(int8x8_t a)13367 int8x8_t test_vrev64_s8(int8x8_t a) {
13368 return vrev64_s8(a);
13369 }
13370
13371 // CHECK-LABEL: @test_vrev64_s16(
13372 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13373 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_s16(int16x4_t a)13374 int16x4_t test_vrev64_s16(int16x4_t a) {
13375 return vrev64_s16(a);
13376 }
13377
13378 // CHECK-LABEL: @test_vrev64_s32(
13379 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
13380 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_s32(int32x2_t a)13381 int32x2_t test_vrev64_s32(int32x2_t a) {
13382 return vrev64_s32(a);
13383 }
13384
13385 // CHECK-LABEL: @test_vrev64_u8(
13386 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13387 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_u8(uint8x8_t a)13388 uint8x8_t test_vrev64_u8(uint8x8_t a) {
13389 return vrev64_u8(a);
13390 }
13391
13392 // CHECK-LABEL: @test_vrev64_u16(
13393 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13394 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_u16(uint16x4_t a)13395 uint16x4_t test_vrev64_u16(uint16x4_t a) {
13396 return vrev64_u16(a);
13397 }
13398
13399 // CHECK-LABEL: @test_vrev64_u32(
13400 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
13401 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_u32(uint32x2_t a)13402 uint32x2_t test_vrev64_u32(uint32x2_t a) {
13403 return vrev64_u32(a);
13404 }
13405
13406 // CHECK-LABEL: @test_vrev64_p8(
13407 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13408 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_p8(poly8x8_t a)13409 poly8x8_t test_vrev64_p8(poly8x8_t a) {
13410 return vrev64_p8(a);
13411 }
13412
13413 // CHECK-LABEL: @test_vrev64_p16(
13414 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13415 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_p16(poly16x4_t a)13416 poly16x4_t test_vrev64_p16(poly16x4_t a) {
13417 return vrev64_p16(a);
13418 }
13419
13420 // CHECK-LABEL: @test_vrev64_f32(
13421 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
13422 // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vrev64_f32(float32x2_t a)13423 float32x2_t test_vrev64_f32(float32x2_t a) {
13424 return vrev64_f32(a);
13425 }
13426
13427 // CHECK-LABEL: @test_vrev64q_s8(
13428 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13429 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_s8(int8x16_t a)13430 int8x16_t test_vrev64q_s8(int8x16_t a) {
13431 return vrev64q_s8(a);
13432 }
13433
13434 // CHECK-LABEL: @test_vrev64q_s16(
13435 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13436 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_s16(int16x8_t a)13437 int16x8_t test_vrev64q_s16(int16x8_t a) {
13438 return vrev64q_s16(a);
13439 }
13440
13441 // CHECK-LABEL: @test_vrev64q_s32(
13442 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13443 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_s32(int32x4_t a)13444 int32x4_t test_vrev64q_s32(int32x4_t a) {
13445 return vrev64q_s32(a);
13446 }
13447
13448 // CHECK-LABEL: @test_vrev64q_u8(
13449 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13450 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_u8(uint8x16_t a)13451 uint8x16_t test_vrev64q_u8(uint8x16_t a) {
13452 return vrev64q_u8(a);
13453 }
13454
13455 // CHECK-LABEL: @test_vrev64q_u16(
13456 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13457 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_u16(uint16x8_t a)13458 uint16x8_t test_vrev64q_u16(uint16x8_t a) {
13459 return vrev64q_u16(a);
13460 }
13461
13462 // CHECK-LABEL: @test_vrev64q_u32(
13463 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13464 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_u32(uint32x4_t a)13465 uint32x4_t test_vrev64q_u32(uint32x4_t a) {
13466 return vrev64q_u32(a);
13467 }
13468
13469 // CHECK-LABEL: @test_vrev64q_p8(
13470 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13471 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_p8(poly8x16_t a)13472 poly8x16_t test_vrev64q_p8(poly8x16_t a) {
13473 return vrev64q_p8(a);
13474 }
13475
13476 // CHECK-LABEL: @test_vrev64q_p16(
13477 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13478 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_p16(poly16x8_t a)13479 poly16x8_t test_vrev64q_p16(poly16x8_t a) {
13480 return vrev64q_p16(a);
13481 }
13482
13483 // CHECK-LABEL: @test_vrev64q_f32(
13484 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13485 // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vrev64q_f32(float32x4_t a)13486 float32x4_t test_vrev64q_f32(float32x4_t a) {
13487 return vrev64q_f32(a);
13488 }
13489
13490 // CHECK-LABEL: @test_vrhadd_s8(
13491 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
13492 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t a,int8x8_t b)13493 int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) {
13494 return vrhadd_s8(a, b);
13495 }
13496
13497 // CHECK-LABEL: @test_vrhadd_s16(
13498 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13499 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13500 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
13501 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13502 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_s16(int16x4_t a,int16x4_t b)13503 int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) {
13504 return vrhadd_s16(a, b);
13505 }
13506
13507 // CHECK-LABEL: @test_vrhadd_s32(
13508 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13509 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13510 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
13511 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13512 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_s32(int32x2_t a,int32x2_t b)13513 int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) {
13514 return vrhadd_s32(a, b);
13515 }
13516
13517 // CHECK-LABEL: @test_vrhadd_u8(
13518 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
13519 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t a,uint8x8_t b)13520 uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) {
13521 return vrhadd_u8(a, b);
13522 }
13523
13524 // CHECK-LABEL: @test_vrhadd_u16(
13525 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13526 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13527 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
13528 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13529 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_u16(uint16x4_t a,uint16x4_t b)13530 uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) {
13531 return vrhadd_u16(a, b);
13532 }
13533
13534 // CHECK-LABEL: @test_vrhadd_u32(
13535 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13536 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13537 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
13538 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13539 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_u32(uint32x2_t a,uint32x2_t b)13540 uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) {
13541 return vrhadd_u32(a, b);
13542 }
13543
13544 // CHECK-LABEL: @test_vrhaddq_s8(
13545 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
13546 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t a,int8x16_t b)13547 int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) {
13548 return vrhaddq_s8(a, b);
13549 }
13550
13551 // CHECK-LABEL: @test_vrhaddq_s16(
13552 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13553 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13554 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
13555 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13556 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_s16(int16x8_t a,int16x8_t b)13557 int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) {
13558 return vrhaddq_s16(a, b);
13559 }
13560
13561 // CHECK-LABEL: @test_vrhaddq_s32(
13562 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13563 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13564 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
13565 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13566 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_s32(int32x4_t a,int32x4_t b)13567 int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) {
13568 return vrhaddq_s32(a, b);
13569 }
13570
13571 // CHECK-LABEL: @test_vrhaddq_u8(
13572 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
13573 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t a,uint8x16_t b)13574 uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) {
13575 return vrhaddq_u8(a, b);
13576 }
13577
13578 // CHECK-LABEL: @test_vrhaddq_u16(
13579 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13580 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13581 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
13582 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13583 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_u16(uint16x8_t a,uint16x8_t b)13584 uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) {
13585 return vrhaddq_u16(a, b);
13586 }
13587
13588 // CHECK-LABEL: @test_vrhaddq_u32(
13589 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13590 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13591 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
13592 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13593 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_u32(uint32x4_t a,uint32x4_t b)13594 uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) {
13595 return vrhaddq_u32(a, b);
13596 }
13597
13598 // CHECK-LABEL: @test_vrshl_s8(
13599 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
13600 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)13601 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
13602 return vrshl_s8(a, b);
13603 }
13604
13605 // CHECK-LABEL: @test_vrshl_s16(
13606 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13607 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13608 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
13609 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13610 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_s16(int16x4_t a,int16x4_t b)13611 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
13612 return vrshl_s16(a, b);
13613 }
13614
13615 // CHECK-LABEL: @test_vrshl_s32(
13616 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13617 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13618 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
13619 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13620 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_s32(int32x2_t a,int32x2_t b)13621 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
13622 return vrshl_s32(a, b);
13623 }
13624
13625 // CHECK-LABEL: @test_vrshl_s64(
13626 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13627 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13628 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
13629 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13630 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_s64(int64x1_t a,int64x1_t b)13631 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
13632 return vrshl_s64(a, b);
13633 }
13634
13635 // CHECK-LABEL: @test_vrshl_u8(
13636 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
13637 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)13638 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
13639 return vrshl_u8(a, b);
13640 }
13641
13642 // CHECK-LABEL: @test_vrshl_u16(
13643 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13644 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13645 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
13646 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13647 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)13648 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
13649 return vrshl_u16(a, b);
13650 }
13651
13652 // CHECK-LABEL: @test_vrshl_u32(
13653 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13654 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13655 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
13656 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13657 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)13658 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
13659 return vrshl_u32(a, b);
13660 }
13661
13662 // CHECK-LABEL: @test_vrshl_u64(
13663 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13664 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13665 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
13666 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13667 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)13668 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
13669 return vrshl_u64(a, b);
13670 }
13671
13672 // CHECK-LABEL: @test_vrshlq_s8(
13673 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
13674 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)13675 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
13676 return vrshlq_s8(a, b);
13677 }
13678
13679 // CHECK-LABEL: @test_vrshlq_s16(
13680 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13681 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13682 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
13683 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13684 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)13685 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
13686 return vrshlq_s16(a, b);
13687 }
13688
13689 // CHECK-LABEL: @test_vrshlq_s32(
13690 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13691 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13692 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
13693 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13694 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)13695 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
13696 return vrshlq_s32(a, b);
13697 }
13698
13699 // CHECK-LABEL: @test_vrshlq_s64(
13700 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13701 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13702 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
13703 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13704 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)13705 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
13706 return vrshlq_s64(a, b);
13707 }
13708
13709 // CHECK-LABEL: @test_vrshlq_u8(
13710 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
13711 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)13712 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
13713 return vrshlq_u8(a, b);
13714 }
13715
13716 // CHECK-LABEL: @test_vrshlq_u16(
13717 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13718 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13719 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
13720 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13721 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)13722 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
13723 return vrshlq_u16(a, b);
13724 }
13725
13726 // CHECK-LABEL: @test_vrshlq_u32(
13727 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13728 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13729 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
13730 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13731 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)13732 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
13733 return vrshlq_u32(a, b);
13734 }
13735
13736 // CHECK-LABEL: @test_vrshlq_u64(
13737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13738 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13739 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
13740 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13741 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)13742 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
13743 return vrshlq_u64(a, b);
13744 }
13745
13746 // CHECK-LABEL: @test_vrshrn_n_s16(
13747 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13748 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13749 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13750 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)13751 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
13752 return vrshrn_n_s16(a, 1);
13753 }
13754
13755 // CHECK-LABEL: @test_vrshrn_n_s32(
13756 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13757 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13758 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13759 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)13760 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
13761 return vrshrn_n_s32(a, 1);
13762 }
13763
13764 // CHECK-LABEL: @test_vrshrn_n_s64(
13765 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13766 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13767 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13768 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)13769 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
13770 return vrshrn_n_s64(a, 1);
13771 }
13772
13773 // CHECK-LABEL: @test_vrshrn_n_u16(
13774 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13775 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13776 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13777 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)13778 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
13779 return vrshrn_n_u16(a, 1);
13780 }
13781
13782 // CHECK-LABEL: @test_vrshrn_n_u32(
13783 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13784 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13785 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13786 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)13787 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
13788 return vrshrn_n_u32(a, 1);
13789 }
13790
13791 // CHECK-LABEL: @test_vrshrn_n_u64(
13792 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13793 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13794 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13795 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)13796 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
13797 return vrshrn_n_u64(a, 1);
13798 }
13799
13800 // CHECK-LABEL: @test_vrshr_n_s8(
13801 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13802 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)13803 int8x8_t test_vrshr_n_s8(int8x8_t a) {
13804 return vrshr_n_s8(a, 1);
13805 }
13806
13807 // CHECK-LABEL: @test_vrshr_n_s16(
13808 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13809 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13810 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13811 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)13812 int16x4_t test_vrshr_n_s16(int16x4_t a) {
13813 return vrshr_n_s16(a, 1);
13814 }
13815
13816 // CHECK-LABEL: @test_vrshr_n_s32(
13817 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13818 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13819 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13820 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)13821 int32x2_t test_vrshr_n_s32(int32x2_t a) {
13822 return vrshr_n_s32(a, 1);
13823 }
13824
13825 // CHECK-LABEL: @test_vrshr_n_s64(
13826 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13827 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13828 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13829 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)13830 int64x1_t test_vrshr_n_s64(int64x1_t a) {
13831 return vrshr_n_s64(a, 1);
13832 }
13833
13834 // CHECK-LABEL: @test_vrshr_n_u8(
13835 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13836 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(uint8x8_t a)13837 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
13838 return vrshr_n_u8(a, 1);
13839 }
13840
13841 // CHECK-LABEL: @test_vrshr_n_u16(
13842 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13843 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13844 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13845 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(uint16x4_t a)13846 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
13847 return vrshr_n_u16(a, 1);
13848 }
13849
13850 // CHECK-LABEL: @test_vrshr_n_u32(
13851 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13852 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13853 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13854 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(uint32x2_t a)13855 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
13856 return vrshr_n_u32(a, 1);
13857 }
13858
13859 // CHECK-LABEL: @test_vrshr_n_u64(
13860 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13861 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13862 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13863 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)13864 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
13865 return vrshr_n_u64(a, 1);
13866 }
13867
13868 // CHECK-LABEL: @test_vrshrq_n_s8(
13869 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13870 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)13871 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
13872 return vrshrq_n_s8(a, 1);
13873 }
13874
13875 // CHECK-LABEL: @test_vrshrq_n_s16(
13876 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13877 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13878 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13879 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)13880 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
13881 return vrshrq_n_s16(a, 1);
13882 }
13883
13884 // CHECK-LABEL: @test_vrshrq_n_s32(
13885 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13886 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13887 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13888 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)13889 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
13890 return vrshrq_n_s32(a, 1);
13891 }
13892
13893 // CHECK-LABEL: @test_vrshrq_n_s64(
13894 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13895 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13896 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13897 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)13898 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
13899 return vrshrq_n_s64(a, 1);
13900 }
13901
13902 // CHECK-LABEL: @test_vrshrq_n_u8(
13903 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13904 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(uint8x16_t a)13905 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
13906 return vrshrq_n_u8(a, 1);
13907 }
13908
13909 // CHECK-LABEL: @test_vrshrq_n_u16(
13910 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13911 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13912 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13913 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(uint16x8_t a)13914 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
13915 return vrshrq_n_u16(a, 1);
13916 }
13917
13918 // CHECK-LABEL: @test_vrshrq_n_u32(
13919 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13920 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13921 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13922 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(uint32x4_t a)13923 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
13924 return vrshrq_n_u32(a, 1);
13925 }
13926
13927 // CHECK-LABEL: @test_vrshrq_n_u64(
13928 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13929 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13930 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13931 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(uint64x2_t a)13932 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
13933 return vrshrq_n_u64(a, 1);
13934 }
13935
13936 // CHECK-LABEL: @test_vrsqrte_f32(
13937 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13938 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a)
13939 // CHECK: ret <2 x float> [[VRSQRTE_V1_I]]
test_vrsqrte_f32(float32x2_t a)13940 float32x2_t test_vrsqrte_f32(float32x2_t a) {
13941 return vrsqrte_f32(a);
13942 }
13943
13944 // CHECK-LABEL: @test_vrsqrte_u32(
13945 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13946 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a)
13947 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)13948 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
13949 return vrsqrte_u32(a);
13950 }
13951
13952 // CHECK-LABEL: @test_vrsqrteq_f32(
13953 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13954 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a)
13955 // CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_f32(float32x4_t a)13956 float32x4_t test_vrsqrteq_f32(float32x4_t a) {
13957 return vrsqrteq_f32(a);
13958 }
13959
13960 // CHECK-LABEL: @test_vrsqrteq_u32(
13961 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13962 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a)
13963 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)13964 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
13965 return vrsqrteq_u32(a);
13966 }
13967
13968 // CHECK-LABEL: @test_vrsqrts_f32(
13969 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13970 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
13971 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b)
13972 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
13973 // CHECK: ret <2 x float> [[VRSQRTS_V2_I]]
test_vrsqrts_f32(float32x2_t a,float32x2_t b)13974 float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) {
13975 return vrsqrts_f32(a, b);
13976 }
13977
13978 // CHECK-LABEL: @test_vrsqrtsq_f32(
13979 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13980 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
13981 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b)
13982 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
13983 // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f32(float32x4_t a,float32x4_t b)13984 float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) {
13985 return vrsqrtsq_f32(a, b);
13986 }
13987
13988 // CHECK-LABEL: @test_vrsra_n_s8(
13989 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13990 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
13991 // CHECK: ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)13992 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
13993 return vrsra_n_s8(a, b, 1);
13994 }
13995
13996 // CHECK-LABEL: @test_vrsra_n_s16(
13997 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13998 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13999 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14000 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14001 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
14002 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
14003 // CHECK: ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)14004 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
14005 return vrsra_n_s16(a, b, 1);
14006 }
14007
14008 // CHECK-LABEL: @test_vrsra_n_s32(
14009 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14010 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14011 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14012 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14013 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
14014 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
14015 // CHECK: ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)14016 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
14017 return vrsra_n_s32(a, b, 1);
14018 }
14019
14020 // CHECK-LABEL: @test_vrsra_n_s64(
14021 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14022 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14023 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14024 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14025 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
14026 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
14027 // CHECK: ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)14028 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
14029 return vrsra_n_s64(a, b, 1);
14030 }
14031
14032 // CHECK-LABEL: @test_vrsra_n_u8(
14033 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14034 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
14035 // CHECK: ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_u8(uint8x8_t a,uint8x8_t b)14036 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
14037 return vrsra_n_u8(a, b, 1);
14038 }
14039
14040 // CHECK-LABEL: @test_vrsra_n_u16(
14041 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14042 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14043 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14044 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14045 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
14046 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
14047 // CHECK: ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_u16(uint16x4_t a,uint16x4_t b)14048 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
14049 return vrsra_n_u16(a, b, 1);
14050 }
14051
14052 // CHECK-LABEL: @test_vrsra_n_u32(
14053 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14054 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14055 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14056 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14057 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
14058 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
14059 // CHECK: ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_u32(uint32x2_t a,uint32x2_t b)14060 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
14061 return vrsra_n_u32(a, b, 1);
14062 }
14063
14064 // CHECK-LABEL: @test_vrsra_n_u64(
14065 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14066 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14067 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14068 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14069 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
14070 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
14071 // CHECK: ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)14072 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
14073 return vrsra_n_u64(a, b, 1);
14074 }
14075
14076 // CHECK-LABEL: @test_vrsraq_n_s8(
14077 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14078 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
14079 // CHECK: ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)14080 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
14081 return vrsraq_n_s8(a, b, 1);
14082 }
14083
14084 // CHECK-LABEL: @test_vrsraq_n_s16(
14085 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14086 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14087 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14088 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14089 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
14090 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
14091 // CHECK: ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)14092 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
14093 return vrsraq_n_s16(a, b, 1);
14094 }
14095
14096 // CHECK-LABEL: @test_vrsraq_n_s32(
14097 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14098 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14099 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14100 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14101 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
14102 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
14103 // CHECK: ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)14104 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
14105 return vrsraq_n_s32(a, b, 1);
14106 }
14107
14108 // CHECK-LABEL: @test_vrsraq_n_s64(
14109 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14110 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14111 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14112 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14113 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
14114 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
14115 // CHECK: ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)14116 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
14117 return vrsraq_n_s64(a, b, 1);
14118 }
14119
14120 // CHECK-LABEL: @test_vrsraq_n_u8(
14121 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14122 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
14123 // CHECK: ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_u8(uint8x16_t a,uint8x16_t b)14124 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
14125 return vrsraq_n_u8(a, b, 1);
14126 }
14127
14128 // CHECK-LABEL: @test_vrsraq_n_u16(
14129 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14130 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14131 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14132 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14133 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
14134 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
14135 // CHECK: ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_u16(uint16x8_t a,uint16x8_t b)14136 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
14137 return vrsraq_n_u16(a, b, 1);
14138 }
14139
14140 // CHECK-LABEL: @test_vrsraq_n_u32(
14141 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14142 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14143 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14144 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14145 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
14146 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
14147 // CHECK: ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_u32(uint32x4_t a,uint32x4_t b)14148 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
14149 return vrsraq_n_u32(a, b, 1);
14150 }
14151
14152 // CHECK-LABEL: @test_vrsraq_n_u64(
14153 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14154 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14155 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14156 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14157 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
14158 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
14159 // CHECK: ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_u64(uint64x2_t a,uint64x2_t b)14160 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
14161 return vrsraq_n_u64(a, b, 1);
14162 }
14163
14164 // CHECK-LABEL: @test_vrsubhn_s16(
14165 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14166 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14167 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
14168 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)14169 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
14170 return vrsubhn_s16(a, b);
14171 }
14172
14173 // CHECK-LABEL: @test_vrsubhn_s32(
14174 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14175 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14176 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
14177 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
14178 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)14179 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
14180 return vrsubhn_s32(a, b);
14181 }
14182
14183 // CHECK-LABEL: @test_vrsubhn_s64(
14184 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14185 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14186 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
14187 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
14188 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)14189 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
14190 return vrsubhn_s64(a, b);
14191 }
14192
14193 // CHECK-LABEL: @test_vrsubhn_u16(
14194 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14195 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14196 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
14197 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)14198 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
14199 return vrsubhn_u16(a, b);
14200 }
14201
14202 // CHECK-LABEL: @test_vrsubhn_u32(
14203 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14204 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14205 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
14206 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
14207 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)14208 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
14209 return vrsubhn_u32(a, b);
14210 }
14211
14212 // CHECK-LABEL: @test_vrsubhn_u64(
14213 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14214 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14215 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
14216 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
14217 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)14218 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
14219 return vrsubhn_u64(a, b);
14220 }
14221
14222 // CHECK-LABEL: @test_vset_lane_u8(
14223 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14224 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_u8(uint8_t a,uint8x8_t b)14225 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
14226 return vset_lane_u8(a, b, 7);
14227 }
14228
14229 // CHECK-LABEL: @test_vset_lane_u16(
14230 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14231 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_u16(uint16_t a,uint16x4_t b)14232 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
14233 return vset_lane_u16(a, b, 3);
14234 }
14235
14236 // CHECK-LABEL: @test_vset_lane_u32(
14237 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
14238 // CHECK: ret <2 x i32> [[VSET_LANE]]
test_vset_lane_u32(uint32_t a,uint32x2_t b)14239 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
14240 return vset_lane_u32(a, b, 1);
14241 }
14242
14243 // CHECK-LABEL: @test_vset_lane_s8(
14244 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14245 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_s8(int8_t a,int8x8_t b)14246 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
14247 return vset_lane_s8(a, b, 7);
14248 }
14249
14250 // CHECK-LABEL: @test_vset_lane_s16(
14251 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14252 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_s16(int16_t a,int16x4_t b)14253 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
14254 return vset_lane_s16(a, b, 3);
14255 }
14256
14257 // CHECK-LABEL: @test_vset_lane_s32(
14258 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
14259 // CHECK: ret <2 x i32> [[VSET_LANE]]
test_vset_lane_s32(int32_t a,int32x2_t b)14260 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
14261 return vset_lane_s32(a, b, 1);
14262 }
14263
14264 // CHECK-LABEL: @test_vset_lane_p8(
14265 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14266 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_p8(poly8_t a,poly8x8_t b)14267 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
14268 return vset_lane_p8(a, b, 7);
14269 }
14270
14271 // CHECK-LABEL: @test_vset_lane_p16(
14272 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14273 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_p16(poly16_t a,poly16x4_t b)14274 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
14275 return vset_lane_p16(a, b, 3);
14276 }
14277
14278 // CHECK-LABEL: @test_vset_lane_f32(
14279 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
14280 // CHECK: ret <2 x float> [[VSET_LANE]]
test_vset_lane_f32(float32_t a,float32x2_t b)14281 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
14282 return vset_lane_f32(a, b, 1);
14283 }
14284
14285 // CHECK-LABEL: @test_vset_lane_f16(
14286 // CHECK: [[__REINT_246:%.*]] = alloca half, align 2
14287 // CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
14288 // CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
14289 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
14290 // CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2
14291 // CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
14292 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
14293 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
14294 // CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
14295 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
14296 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 1
14297 // CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
14298 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
14299 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
14300 // CHECK: ret <4 x half> [[TMP8]]
test_vset_lane_f16(float16_t * a,float16x4_t b)14301 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
14302 return vset_lane_f16(*a, b, 1);
14303 }
14304
14305 // CHECK-LABEL: @test_vsetq_lane_u8(
14306 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14307 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_u8(uint8_t a,uint8x16_t b)14308 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
14309 return vsetq_lane_u8(a, b, 15);
14310 }
14311
14312 // CHECK-LABEL: @test_vsetq_lane_u16(
14313 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14314 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_u16(uint16_t a,uint16x8_t b)14315 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
14316 return vsetq_lane_u16(a, b, 7);
14317 }
14318
14319 // CHECK-LABEL: @test_vsetq_lane_u32(
14320 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
14321 // CHECK: ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_u32(uint32_t a,uint32x4_t b)14322 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
14323 return vsetq_lane_u32(a, b, 3);
14324 }
14325
14326 // CHECK-LABEL: @test_vsetq_lane_s8(
14327 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14328 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_s8(int8_t a,int8x16_t b)14329 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
14330 return vsetq_lane_s8(a, b, 15);
14331 }
14332
14333 // CHECK-LABEL: @test_vsetq_lane_s16(
14334 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14335 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_s16(int16_t a,int16x8_t b)14336 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
14337 return vsetq_lane_s16(a, b, 7);
14338 }
14339
14340 // CHECK-LABEL: @test_vsetq_lane_s32(
14341 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
14342 // CHECK: ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_s32(int32_t a,int32x4_t b)14343 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
14344 return vsetq_lane_s32(a, b, 3);
14345 }
14346
14347 // CHECK-LABEL: @test_vsetq_lane_p8(
14348 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14349 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_p8(poly8_t a,poly8x16_t b)14350 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
14351 return vsetq_lane_p8(a, b, 15);
14352 }
14353
14354 // CHECK-LABEL: @test_vsetq_lane_p16(
14355 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14356 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_p16(poly16_t a,poly16x8_t b)14357 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
14358 return vsetq_lane_p16(a, b, 7);
14359 }
14360
14361 // CHECK-LABEL: @test_vsetq_lane_f32(
14362 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
14363 // CHECK: ret <4 x float> [[VSET_LANE]]
test_vsetq_lane_f32(float32_t a,float32x4_t b)14364 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
14365 return vsetq_lane_f32(a, b, 3);
14366 }
14367
14368 // CHECK-LABEL: @test_vsetq_lane_f16(
14369 // CHECK: [[__REINT_248:%.*]] = alloca half, align 2
14370 // CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
14371 // CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
14372 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
14373 // CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2
14374 // CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
14375 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
14376 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
14377 // CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
14378 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
14379 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 3
14380 // CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
14381 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
14382 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
14383 // CHECK: ret <8 x half> [[TMP8]]
test_vsetq_lane_f16(float16_t * a,float16x8_t b)14384 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
14385 return vsetq_lane_f16(*a, b, 3);
14386 }
14387
14388 // CHECK-LABEL: @test_vset_lane_s64(
14389 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
14390 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vset_lane_s64(int64_t a,int64x1_t b)14391 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
14392 return vset_lane_s64(a, b, 0);
14393 }
14394
14395 // CHECK-LABEL: @test_vset_lane_u64(
14396 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
14397 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vset_lane_u64(uint64_t a,uint64x1_t b)14398 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
14399 return vset_lane_u64(a, b, 0);
14400 }
14401
14402 // CHECK-LABEL: @test_vsetq_lane_s64(
14403 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
14404 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_s64(int64_t a,int64x2_t b)14405 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
14406 return vsetq_lane_s64(a, b, 1);
14407 }
14408
14409 // CHECK-LABEL: @test_vsetq_lane_u64(
14410 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
14411 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_u64(uint64_t a,uint64x2_t b)14412 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
14413 return vsetq_lane_u64(a, b, 1);
14414 }
14415
14416 // CHECK-LABEL: @test_vshl_s8(
14417 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
14418 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)14419 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
14420 return vshl_s8(a, b);
14421 }
14422
14423 // CHECK-LABEL: @test_vshl_s16(
14424 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14425 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14426 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
14427 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
14428 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
test_vshl_s16(int16x4_t a,int16x4_t b)14429 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
14430 return vshl_s16(a, b);
14431 }
14432
14433 // CHECK-LABEL: @test_vshl_s32(
14434 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14435 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14436 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
14437 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
14438 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
test_vshl_s32(int32x2_t a,int32x2_t b)14439 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
14440 return vshl_s32(a, b);
14441 }
14442
14443 // CHECK-LABEL: @test_vshl_s64(
14444 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14445 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14446 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
14447 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
14448 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
test_vshl_s64(int64x1_t a,int64x1_t b)14449 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
14450 return vshl_s64(a, b);
14451 }
14452
14453 // CHECK-LABEL: @test_vshl_u8(
14454 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
14455 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)14456 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
14457 return vshl_u8(a, b);
14458 }
14459
14460 // CHECK-LABEL: @test_vshl_u16(
14461 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14462 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14463 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
14464 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
14465 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
test_vshl_u16(uint16x4_t a,int16x4_t b)14466 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
14467 return vshl_u16(a, b);
14468 }
14469
14470 // CHECK-LABEL: @test_vshl_u32(
14471 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14472 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14473 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
14474 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
14475 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
test_vshl_u32(uint32x2_t a,int32x2_t b)14476 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
14477 return vshl_u32(a, b);
14478 }
14479
14480 // CHECK-LABEL: @test_vshl_u64(
14481 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14482 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14483 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
14484 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
14485 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
test_vshl_u64(uint64x1_t a,int64x1_t b)14486 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
14487 return vshl_u64(a, b);
14488 }
14489
14490 // CHECK-LABEL: @test_vshlq_s8(
14491 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
14492 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)14493 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
14494 return vshlq_s8(a, b);
14495 }
14496
14497 // CHECK-LABEL: @test_vshlq_s16(
14498 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14499 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14500 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
14501 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14502 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_s16(int16x8_t a,int16x8_t b)14503 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
14504 return vshlq_s16(a, b);
14505 }
14506
14507 // CHECK-LABEL: @test_vshlq_s32(
14508 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14509 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14510 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
14511 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14512 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_s32(int32x4_t a,int32x4_t b)14513 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
14514 return vshlq_s32(a, b);
14515 }
14516
14517 // CHECK-LABEL: @test_vshlq_s64(
14518 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14519 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14520 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
14521 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14522 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_s64(int64x2_t a,int64x2_t b)14523 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
14524 return vshlq_s64(a, b);
14525 }
14526
14527 // CHECK-LABEL: @test_vshlq_u8(
14528 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
14529 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)14530 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
14531 return vshlq_u8(a, b);
14532 }
14533
14534 // CHECK-LABEL: @test_vshlq_u16(
14535 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14536 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14537 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
14538 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14539 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)14540 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
14541 return vshlq_u16(a, b);
14542 }
14543
14544 // CHECK-LABEL: @test_vshlq_u32(
14545 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14546 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14547 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
14548 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14549 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)14550 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
14551 return vshlq_u32(a, b);
14552 }
14553
14554 // CHECK-LABEL: @test_vshlq_u64(
14555 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14556 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14557 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
14558 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14559 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)14560 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
14561 return vshlq_u64(a, b);
14562 }
14563
14564 // CHECK-LABEL: @test_vshll_n_s8(
14565 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
14566 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14567 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)14568 int16x8_t test_vshll_n_s8(int8x8_t a) {
14569 return vshll_n_s8(a, 1);
14570 }
14571
14572 // CHECK-LABEL: @test_vshll_n_s16(
14573 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14574 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14575 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
14576 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14577 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)14578 int32x4_t test_vshll_n_s16(int16x4_t a) {
14579 return vshll_n_s16(a, 1);
14580 }
14581
14582 // CHECK-LABEL: @test_vshll_n_s32(
14583 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14584 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14585 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
14586 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14587 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)14588 int64x2_t test_vshll_n_s32(int32x2_t a) {
14589 return vshll_n_s32(a, 1);
14590 }
14591
14592 // CHECK-LABEL: @test_vshll_n_u8(
14593 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
14594 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14595 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)14596 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
14597 return vshll_n_u8(a, 1);
14598 }
14599
14600 // CHECK-LABEL: @test_vshll_n_u16(
14601 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14602 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14603 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
14604 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14605 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)14606 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
14607 return vshll_n_u16(a, 1);
14608 }
14609
14610 // CHECK-LABEL: @test_vshll_n_u32(
14611 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14612 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14613 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
14614 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14615 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)14616 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
14617 return vshll_n_u32(a, 1);
14618 }
14619
14620 // CHECK-LABEL: @test_vshl_n_s8(
14621 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14622 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)14623 int8x8_t test_vshl_n_s8(int8x8_t a) {
14624 return vshl_n_s8(a, 1);
14625 }
14626
14627 // CHECK-LABEL: @test_vshl_n_s16(
14628 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14629 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14630 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14631 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)14632 int16x4_t test_vshl_n_s16(int16x4_t a) {
14633 return vshl_n_s16(a, 1);
14634 }
14635
14636 // CHECK-LABEL: @test_vshl_n_s32(
14637 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14638 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14639 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14640 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)14641 int32x2_t test_vshl_n_s32(int32x2_t a) {
14642 return vshl_n_s32(a, 1);
14643 }
14644
14645 // CHECK-LABEL: @test_vshl_n_s64(
14646 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14647 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14648 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14649 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)14650 int64x1_t test_vshl_n_s64(int64x1_t a) {
14651 return vshl_n_s64(a, 1);
14652 }
14653
14654 // CHECK-LABEL: @test_vshl_n_u8(
14655 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14656 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(uint8x8_t a)14657 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
14658 return vshl_n_u8(a, 1);
14659 }
14660
14661 // CHECK-LABEL: @test_vshl_n_u16(
14662 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14663 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14664 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14665 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(uint16x4_t a)14666 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
14667 return vshl_n_u16(a, 1);
14668 }
14669
14670 // CHECK-LABEL: @test_vshl_n_u32(
14671 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14672 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14673 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14674 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(uint32x2_t a)14675 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
14676 return vshl_n_u32(a, 1);
14677 }
14678
14679 // CHECK-LABEL: @test_vshl_n_u64(
14680 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14681 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14682 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14683 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)14684 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14685 return vshl_n_u64(a, 1);
14686 }
14687
14688 // CHECK-LABEL: @test_vshlq_n_s8(
14689 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14690 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)14691 int8x16_t test_vshlq_n_s8(int8x16_t a) {
14692 return vshlq_n_s8(a, 1);
14693 }
14694
14695 // CHECK-LABEL: @test_vshlq_n_s16(
14696 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14697 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14698 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14699 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)14700 int16x8_t test_vshlq_n_s16(int16x8_t a) {
14701 return vshlq_n_s16(a, 1);
14702 }
14703
14704 // CHECK-LABEL: @test_vshlq_n_s32(
14705 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14706 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14707 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14708 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)14709 int32x4_t test_vshlq_n_s32(int32x4_t a) {
14710 return vshlq_n_s32(a, 1);
14711 }
14712
14713 // CHECK-LABEL: @test_vshlq_n_s64(
14714 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14715 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14716 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14717 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)14718 int64x2_t test_vshlq_n_s64(int64x2_t a) {
14719 return vshlq_n_s64(a, 1);
14720 }
14721
14722 // CHECK-LABEL: @test_vshlq_n_u8(
14723 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14724 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(uint8x16_t a)14725 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
14726 return vshlq_n_u8(a, 1);
14727 }
14728
14729 // CHECK-LABEL: @test_vshlq_n_u16(
14730 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14731 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14732 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14733 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(uint16x8_t a)14734 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
14735 return vshlq_n_u16(a, 1);
14736 }
14737
14738 // CHECK-LABEL: @test_vshlq_n_u32(
14739 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14740 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14741 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14742 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(uint32x4_t a)14743 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
14744 return vshlq_n_u32(a, 1);
14745 }
14746
14747 // CHECK-LABEL: @test_vshlq_n_u64(
14748 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14749 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14750 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14751 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(uint64x2_t a)14752 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
14753 return vshlq_n_u64(a, 1);
14754 }
14755
14756 // CHECK-LABEL: @test_vshrn_n_s16(
14757 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14758 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14759 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14760 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14761 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)14762 int8x8_t test_vshrn_n_s16(int16x8_t a) {
14763 return vshrn_n_s16(a, 1);
14764 }
14765
14766 // CHECK-LABEL: @test_vshrn_n_s32(
14767 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14768 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14769 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14770 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14771 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)14772 int16x4_t test_vshrn_n_s32(int32x4_t a) {
14773 return vshrn_n_s32(a, 1);
14774 }
14775
14776 // CHECK-LABEL: @test_vshrn_n_s64(
14777 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14778 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14779 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14780 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14781 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)14782 int32x2_t test_vshrn_n_s64(int64x2_t a) {
14783 return vshrn_n_s64(a, 1);
14784 }
14785
14786 // CHECK-LABEL: @test_vshrn_n_u16(
14787 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14788 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14789 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14790 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14791 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)14792 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
14793 return vshrn_n_u16(a, 1);
14794 }
14795
14796 // CHECK-LABEL: @test_vshrn_n_u32(
14797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14798 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14799 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14800 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14801 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)14802 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
14803 return vshrn_n_u32(a, 1);
14804 }
14805
14806 // CHECK-LABEL: @test_vshrn_n_u64(
14807 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14808 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14809 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14810 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14811 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)14812 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
14813 return vshrn_n_u64(a, 1);
14814 }
14815
14816 // CHECK-LABEL: @test_vshr_n_s8(
14817 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14818 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)14819 int8x8_t test_vshr_n_s8(int8x8_t a) {
14820 return vshr_n_s8(a, 1);
14821 }
14822
14823 // CHECK-LABEL: @test_vshr_n_s16(
14824 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14825 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14826 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14827 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)14828 int16x4_t test_vshr_n_s16(int16x4_t a) {
14829 return vshr_n_s16(a, 1);
14830 }
14831
14832 // CHECK-LABEL: @test_vshr_n_s32(
14833 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14834 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14835 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1>
14836 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)14837 int32x2_t test_vshr_n_s32(int32x2_t a) {
14838 return vshr_n_s32(a, 1);
14839 }
14840
14841 // CHECK-LABEL: @test_vshr_n_s64(
14842 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14843 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14844 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14845 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)14846 int64x1_t test_vshr_n_s64(int64x1_t a) {
14847 return vshr_n_s64(a, 1);
14848 }
14849
14850 // CHECK-LABEL: @test_vshr_n_u8(
14851 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14852 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(uint8x8_t a)14853 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
14854 return vshr_n_u8(a, 1);
14855 }
14856
14857 // CHECK-LABEL: @test_vshr_n_u16(
14858 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14859 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14860 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14861 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(uint16x4_t a)14862 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
14863 return vshr_n_u16(a, 1);
14864 }
14865
14866 // CHECK-LABEL: @test_vshr_n_u32(
14867 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14868 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14869 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
14870 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(uint32x2_t a)14871 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
14872 return vshr_n_u32(a, 1);
14873 }
14874
14875 // CHECK-LABEL: @test_vshr_n_u64(
14876 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14877 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14878 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14879 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)14880 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14881 return vshr_n_u64(a, 1);
14882 }
14883
14884 // CHECK-LABEL: @test_vshrq_n_s8(
14885 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14886 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)14887 int8x16_t test_vshrq_n_s8(int8x16_t a) {
14888 return vshrq_n_s8(a, 1);
14889 }
14890
14891 // CHECK-LABEL: @test_vshrq_n_s16(
14892 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14893 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14894 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14895 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)14896 int16x8_t test_vshrq_n_s16(int16x8_t a) {
14897 return vshrq_n_s16(a, 1);
14898 }
14899
14900 // CHECK-LABEL: @test_vshrq_n_s32(
14901 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14902 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14903 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14904 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)14905 int32x4_t test_vshrq_n_s32(int32x4_t a) {
14906 return vshrq_n_s32(a, 1);
14907 }
14908
14909 // CHECK-LABEL: @test_vshrq_n_s64(
14910 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14911 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14912 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14913 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)14914 int64x2_t test_vshrq_n_s64(int64x2_t a) {
14915 return vshrq_n_s64(a, 1);
14916 }
14917
14918 // CHECK-LABEL: @test_vshrq_n_u8(
14919 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14920 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(uint8x16_t a)14921 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
14922 return vshrq_n_u8(a, 1);
14923 }
14924
14925 // CHECK-LABEL: @test_vshrq_n_u16(
14926 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14927 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14928 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14929 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(uint16x8_t a)14930 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
14931 return vshrq_n_u16(a, 1);
14932 }
14933
14934 // CHECK-LABEL: @test_vshrq_n_u32(
14935 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14936 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14937 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14938 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(uint32x4_t a)14939 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
14940 return vshrq_n_u32(a, 1);
14941 }
14942
14943 // CHECK-LABEL: @test_vshrq_n_u64(
14944 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14945 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14946 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14947 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(uint64x2_t a)14948 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
14949 return vshrq_n_u64(a, 1);
14950 }
14951
14952 // CHECK-LABEL: @test_vsli_n_s8(
14953 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14954 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)14955 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
14956 return vsli_n_s8(a, b, 1);
14957 }
14958
14959 // CHECK-LABEL: @test_vsli_n_s16(
14960 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14961 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14962 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14963 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14964 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14965 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)14966 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
14967 return vsli_n_s16(a, b, 1);
14968 }
14969
14970 // CHECK-LABEL: @test_vsli_n_s32(
14971 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14972 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14973 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14974 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14975 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
14976 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)14977 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
14978 return vsli_n_s32(a, b, 1);
14979 }
14980
14981 // CHECK-LABEL: @test_vsli_n_s64(
14982 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14983 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14984 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14985 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14986 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
14987 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)14988 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14989 return vsli_n_s64(a, b, 1);
14990 }
14991
14992 // CHECK-LABEL: @test_vsli_n_u8(
14993 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14994 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)14995 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
14996 return vsli_n_u8(a, b, 1);
14997 }
14998
14999 // CHECK-LABEL: @test_vsli_n_u16(
15000 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15001 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15002 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15003 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15004 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
15005 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)15006 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
15007 return vsli_n_u16(a, b, 1);
15008 }
15009
15010 // CHECK-LABEL: @test_vsli_n_u32(
15011 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15012 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15013 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15014 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15015 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
15016 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)15017 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
15018 return vsli_n_u32(a, b, 1);
15019 }
15020
15021 // CHECK-LABEL: @test_vsli_n_u64(
15022 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15023 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15024 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15025 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15026 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
15027 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)15028 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
15029 return vsli_n_u64(a, b, 1);
15030 }
15031
15032 // CHECK-LABEL: @test_vsli_n_p8(
15033 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15034 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)15035 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
15036 return vsli_n_p8(a, b, 1);
15037 }
15038
15039 // CHECK-LABEL: @test_vsli_n_p16(
15040 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15041 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15042 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15043 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15044 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
15045 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)15046 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
15047 return vsli_n_p16(a, b, 1);
15048 }
15049
15050 // CHECK-LABEL: @test_vsliq_n_s8(
15051 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15052 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)15053 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
15054 return vsliq_n_s8(a, b, 1);
15055 }
15056
15057 // CHECK-LABEL: @test_vsliq_n_s16(
15058 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15059 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15060 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15061 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15062 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15063 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)15064 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
15065 return vsliq_n_s16(a, b, 1);
15066 }
15067
15068 // CHECK-LABEL: @test_vsliq_n_s32(
15069 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15070 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15071 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15072 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15073 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
15074 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)15075 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
15076 return vsliq_n_s32(a, b, 1);
15077 }
15078
15079 // CHECK-LABEL: @test_vsliq_n_s64(
15080 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15081 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15082 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15083 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15084 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
15085 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)15086 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
15087 return vsliq_n_s64(a, b, 1);
15088 }
15089
15090 // CHECK-LABEL: @test_vsliq_n_u8(
15091 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15092 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)15093 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
15094 return vsliq_n_u8(a, b, 1);
15095 }
15096
15097 // CHECK-LABEL: @test_vsliq_n_u16(
15098 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15099 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15100 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15101 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15102 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15103 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)15104 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
15105 return vsliq_n_u16(a, b, 1);
15106 }
15107
15108 // CHECK-LABEL: @test_vsliq_n_u32(
15109 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15110 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15111 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15112 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15113 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
15114 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)15115 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
15116 return vsliq_n_u32(a, b, 1);
15117 }
15118
15119 // CHECK-LABEL: @test_vsliq_n_u64(
15120 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15121 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15122 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15123 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15124 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
15125 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)15126 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
15127 return vsliq_n_u64(a, b, 1);
15128 }
15129
15130 // CHECK-LABEL: @test_vsliq_n_p8(
15131 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15132 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)15133 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
15134 return vsliq_n_p8(a, b, 1);
15135 }
15136
15137 // CHECK-LABEL: @test_vsliq_n_p16(
15138 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15139 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15140 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15141 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15142 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15143 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)15144 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
15145 return vsliq_n_p16(a, b, 1);
15146 }
15147
15148 // CHECK-LABEL: @test_vsra_n_s8(
15149 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15150 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
15151 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)15152 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
15153 return vsra_n_s8(a, b, 1);
15154 }
15155
15156 // CHECK-LABEL: @test_vsra_n_s16(
15157 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15158 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15159 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15160 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15161 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
15162 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
15163 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)15164 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
15165 return vsra_n_s16(a, b, 1);
15166 }
15167
15168 // CHECK-LABEL: @test_vsra_n_s32(
15169 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15170 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15171 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15172 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15173 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 1, i32 1>
15174 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
15175 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)15176 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
15177 return vsra_n_s32(a, b, 1);
15178 }
15179
15180 // CHECK-LABEL: @test_vsra_n_s64(
15181 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15182 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15183 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15184 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15185 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
15186 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
15187 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)15188 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
15189 return vsra_n_s64(a, b, 1);
15190 }
15191
15192 // CHECK-LABEL: @test_vsra_n_u8(
15193 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15194 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
15195 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_u8(uint8x8_t a,uint8x8_t b)15196 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
15197 return vsra_n_u8(a, b, 1);
15198 }
15199
15200 // CHECK-LABEL: @test_vsra_n_u16(
15201 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15202 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15203 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15204 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15205 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
15206 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
15207 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_u16(uint16x4_t a,uint16x4_t b)15208 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
15209 return vsra_n_u16(a, b, 1);
15210 }
15211
15212 // CHECK-LABEL: @test_vsra_n_u32(
15213 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15214 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15215 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15216 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15217 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1>
15218 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
15219 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_u32(uint32x2_t a,uint32x2_t b)15220 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
15221 return vsra_n_u32(a, b, 1);
15222 }
15223
15224 // CHECK-LABEL: @test_vsra_n_u64(
15225 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15226 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15227 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15228 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15229 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
15230 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
15231 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)15232 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
15233 return vsra_n_u64(a, b, 1);
15234 }
15235
15236 // CHECK-LABEL: @test_vsraq_n_s8(
15237 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15238 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
15239 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)15240 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
15241 return vsraq_n_s8(a, b, 1);
15242 }
15243
15244 // CHECK-LABEL: @test_vsraq_n_s16(
15245 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15246 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15247 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15248 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15249 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
15250 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
15251 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)15252 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
15253 return vsraq_n_s16(a, b, 1);
15254 }
15255
15256 // CHECK-LABEL: @test_vsraq_n_s32(
15257 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15258 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15259 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15260 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15261 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
15262 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
15263 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)15264 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
15265 return vsraq_n_s32(a, b, 1);
15266 }
15267
15268 // CHECK-LABEL: @test_vsraq_n_s64(
15269 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15270 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15271 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15272 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15273 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 1, i64 1>
15274 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
15275 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)15276 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
15277 return vsraq_n_s64(a, b, 1);
15278 }
15279
15280 // CHECK-LABEL: @test_vsraq_n_u8(
15281 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15282 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
15283 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(uint8x16_t a,uint8x16_t b)15284 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
15285 return vsraq_n_u8(a, b, 1);
15286 }
15287
15288 // CHECK-LABEL: @test_vsraq_n_u16(
15289 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15290 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15291 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15292 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15293 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
15294 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
15295 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(uint16x8_t a,uint16x8_t b)15296 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
15297 return vsraq_n_u16(a, b, 1);
15298 }
15299
15300 // CHECK-LABEL: @test_vsraq_n_u32(
15301 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15302 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15303 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15304 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15305 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
15306 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
15307 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(uint32x4_t a,uint32x4_t b)15308 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
15309 return vsraq_n_u32(a, b, 1);
15310 }
15311
15312 // CHECK-LABEL: @test_vsraq_n_u64(
15313 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15314 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15315 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15316 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15317 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 1, i64 1>
15318 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
15319 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(uint64x2_t a,uint64x2_t b)15320 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
15321 return vsraq_n_u64(a, b, 1);
15322 }
15323
15324 // CHECK-LABEL: @test_vsri_n_s8(
15325 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15326 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)15327 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
15328 return vsri_n_s8(a, b, 1);
15329 }
15330
15331 // CHECK-LABEL: @test_vsri_n_s16(
15332 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15333 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15334 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15335 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15336 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15337 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)15338 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
15339 return vsri_n_s16(a, b, 1);
15340 }
15341
15342 // CHECK-LABEL: @test_vsri_n_s32(
15343 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15344 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15345 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15346 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15347 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
15348 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)15349 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
15350 return vsri_n_s32(a, b, 1);
15351 }
15352
15353 // CHECK-LABEL: @test_vsri_n_s64(
15354 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15355 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15356 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15357 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15358 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
15359 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)15360 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
15361 return vsri_n_s64(a, b, 1);
15362 }
15363
15364 // CHECK-LABEL: @test_vsri_n_u8(
15365 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15366 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsri_n_u8(uint8x8_t a,uint8x8_t b)15367 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
15368 return vsri_n_u8(a, b, 1);
15369 }
15370
15371 // CHECK-LABEL: @test_vsri_n_u16(
15372 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15373 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15374 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15375 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15376 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15377 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsri_n_u16(uint16x4_t a,uint16x4_t b)15378 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
15379 return vsri_n_u16(a, b, 1);
15380 }
15381
15382 // CHECK-LABEL: @test_vsri_n_u32(
15383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15384 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15385 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15386 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15387 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
15388 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsri_n_u32(uint32x2_t a,uint32x2_t b)15389 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
15390 return vsri_n_u32(a, b, 1);
15391 }
15392
15393 // CHECK-LABEL: @test_vsri_n_u64(
15394 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15395 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15396 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15397 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15398 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
15399 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)15400 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
15401 return vsri_n_u64(a, b, 1);
15402 }
15403
15404 // CHECK-LABEL: @test_vsri_n_p8(
15405 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15406 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)15407 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
15408 return vsri_n_p8(a, b, 1);
15409 }
15410
15411 // CHECK-LABEL: @test_vsri_n_p16(
15412 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15413 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15414 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15415 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15416 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15417 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)15418 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
15419 return vsri_n_p16(a, b, 1);
15420 }
15421
15422 // CHECK-LABEL: @test_vsriq_n_s8(
15423 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15424 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)15425 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
15426 return vsriq_n_s8(a, b, 1);
15427 }
15428
15429 // CHECK-LABEL: @test_vsriq_n_s16(
15430 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15431 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15432 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15433 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15434 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15435 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)15436 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
15437 return vsriq_n_s16(a, b, 1);
15438 }
15439
15440 // CHECK-LABEL: @test_vsriq_n_s32(
15441 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15442 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15443 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15444 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15445 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
15446 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)15447 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
15448 return vsriq_n_s32(a, b, 1);
15449 }
15450
15451 // CHECK-LABEL: @test_vsriq_n_s64(
15452 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15453 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15454 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15455 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15456 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15457 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)15458 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
15459 return vsriq_n_s64(a, b, 1);
15460 }
15461
15462 // CHECK-LABEL: @test_vsriq_n_u8(
15463 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15464 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsriq_n_u8(uint8x16_t a,uint8x16_t b)15465 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
15466 return vsriq_n_u8(a, b, 1);
15467 }
15468
15469 // CHECK-LABEL: @test_vsriq_n_u16(
15470 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15471 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15472 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15473 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15474 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15475 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_u16(uint16x8_t a,uint16x8_t b)15476 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
15477 return vsriq_n_u16(a, b, 1);
15478 }
15479
15480 // CHECK-LABEL: @test_vsriq_n_u32(
15481 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15482 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15483 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15484 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15485 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
15486 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_u32(uint32x4_t a,uint32x4_t b)15487 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
15488 return vsriq_n_u32(a, b, 1);
15489 }
15490
15491 // CHECK-LABEL: @test_vsriq_n_u64(
15492 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15493 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15494 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15495 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15496 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15497 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_u64(uint64x2_t a,uint64x2_t b)15498 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
15499 return vsriq_n_u64(a, b, 1);
15500 }
15501
15502 // CHECK-LABEL: @test_vsriq_n_p8(
15503 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15504 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)15505 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
15506 return vsriq_n_p8(a, b, 1);
15507 }
15508
15509 // CHECK-LABEL: @test_vsriq_n_p16(
15510 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15511 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15512 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15513 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15514 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15515 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)15516 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
15517 return vsriq_n_p16(a, b, 1);
15518 }
15519
15520 // CHECK-LABEL: @test_vst1q_u8(
15521 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15522 // CHECK: ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)15523 void test_vst1q_u8(uint8_t * a, uint8x16_t b) {
15524 vst1q_u8(a, b);
15525 }
15526
15527 // CHECK-LABEL: @test_vst1q_u16(
15528 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15529 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15530 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15531 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15532 // CHECK: ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)15533 void test_vst1q_u16(uint16_t * a, uint16x8_t b) {
15534 vst1q_u16(a, b);
15535 }
15536
15537 // CHECK-LABEL: @test_vst1q_u32(
15538 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15539 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15540 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15541 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
15542 // CHECK: ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)15543 void test_vst1q_u32(uint32_t * a, uint32x4_t b) {
15544 vst1q_u32(a, b);
15545 }
15546
15547 // CHECK-LABEL: @test_vst1q_u64(
15548 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15549 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15550 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15551 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
15552 // CHECK: ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)15553 void test_vst1q_u64(uint64_t * a, uint64x2_t b) {
15554 vst1q_u64(a, b);
15555 }
15556
15557 // CHECK-LABEL: @test_vst1q_s8(
15558 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15559 // CHECK: ret void
test_vst1q_s8(int8_t * a,int8x16_t b)15560 void test_vst1q_s8(int8_t * a, int8x16_t b) {
15561 vst1q_s8(a, b);
15562 }
15563
15564 // CHECK-LABEL: @test_vst1q_s16(
15565 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15566 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15567 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15568 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15569 // CHECK: ret void
test_vst1q_s16(int16_t * a,int16x8_t b)15570 void test_vst1q_s16(int16_t * a, int16x8_t b) {
15571 vst1q_s16(a, b);
15572 }
15573
15574 // CHECK-LABEL: @test_vst1q_s32(
15575 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15576 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15577 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15578 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
15579 // CHECK: ret void
test_vst1q_s32(int32_t * a,int32x4_t b)15580 void test_vst1q_s32(int32_t * a, int32x4_t b) {
15581 vst1q_s32(a, b);
15582 }
15583
15584 // CHECK-LABEL: @test_vst1q_s64(
15585 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15586 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15587 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15588 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
15589 // CHECK: ret void
test_vst1q_s64(int64_t * a,int64x2_t b)15590 void test_vst1q_s64(int64_t * a, int64x2_t b) {
15591 vst1q_s64(a, b);
15592 }
15593
15594 // CHECK-LABEL: @test_vst1q_f16(
15595 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
15596 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15597 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15598 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* [[TMP0]], <8 x half> [[TMP2]], i32 2)
15599 // CHECK: ret void
test_vst1q_f16(float16_t * a,float16x8_t b)15600 void test_vst1q_f16(float16_t * a, float16x8_t b) {
15601 vst1q_f16(a, b);
15602 }
15603
15604 // CHECK-LABEL: @test_vst1q_f32(
15605 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
15606 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15607 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15608 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* [[TMP0]], <4 x float> [[TMP2]], i32 4)
15609 // CHECK: ret void
test_vst1q_f32(float32_t * a,float32x4_t b)15610 void test_vst1q_f32(float32_t * a, float32x4_t b) {
15611 vst1q_f32(a, b);
15612 }
15613
15614 // CHECK-LABEL: @test_vst1q_p8(
15615 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15616 // CHECK: ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)15617 void test_vst1q_p8(poly8_t * a, poly8x16_t b) {
15618 vst1q_p8(a, b);
15619 }
15620
15621 // CHECK-LABEL: @test_vst1q_p16(
15622 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15623 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15624 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15625 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15626 // CHECK: ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)15627 void test_vst1q_p16(poly16_t * a, poly16x8_t b) {
15628 vst1q_p16(a, b);
15629 }
15630
15631 // CHECK-LABEL: @test_vst1_u8(
15632 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15633 // CHECK: ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)15634 void test_vst1_u8(uint8_t * a, uint8x8_t b) {
15635 vst1_u8(a, b);
15636 }
15637
15638 // CHECK-LABEL: @test_vst1_u16(
15639 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15640 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15641 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15642 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15643 // CHECK: ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)15644 void test_vst1_u16(uint16_t * a, uint16x4_t b) {
15645 vst1_u16(a, b);
15646 }
15647
15648 // CHECK-LABEL: @test_vst1_u32(
15649 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15650 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15651 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15652 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
15653 // CHECK: ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)15654 void test_vst1_u32(uint32_t * a, uint32x2_t b) {
15655 vst1_u32(a, b);
15656 }
15657
15658 // CHECK-LABEL: @test_vst1_u64(
15659 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15660 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15661 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15662 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
15663 // CHECK: ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)15664 void test_vst1_u64(uint64_t * a, uint64x1_t b) {
15665 vst1_u64(a, b);
15666 }
15667
15668 // CHECK-LABEL: @test_vst1_s8(
15669 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15670 // CHECK: ret void
test_vst1_s8(int8_t * a,int8x8_t b)15671 void test_vst1_s8(int8_t * a, int8x8_t b) {
15672 vst1_s8(a, b);
15673 }
15674
15675 // CHECK-LABEL: @test_vst1_s16(
15676 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15677 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15678 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15679 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15680 // CHECK: ret void
test_vst1_s16(int16_t * a,int16x4_t b)15681 void test_vst1_s16(int16_t * a, int16x4_t b) {
15682 vst1_s16(a, b);
15683 }
15684
15685 // CHECK-LABEL: @test_vst1_s32(
15686 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15687 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15688 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15689 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
15690 // CHECK: ret void
test_vst1_s32(int32_t * a,int32x2_t b)15691 void test_vst1_s32(int32_t * a, int32x2_t b) {
15692 vst1_s32(a, b);
15693 }
15694
15695 // CHECK-LABEL: @test_vst1_s64(
15696 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15697 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15698 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15699 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
15700 // CHECK: ret void
test_vst1_s64(int64_t * a,int64x1_t b)15701 void test_vst1_s64(int64_t * a, int64x1_t b) {
15702 vst1_s64(a, b);
15703 }
15704
15705 // CHECK-LABEL: @test_vst1_f16(
15706 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
15707 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15708 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15709 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* [[TMP0]], <4 x half> [[TMP2]], i32 2)
15710 // CHECK: ret void
test_vst1_f16(float16_t * a,float16x4_t b)15711 void test_vst1_f16(float16_t * a, float16x4_t b) {
15712 vst1_f16(a, b);
15713 }
15714
15715 // CHECK-LABEL: @test_vst1_f32(
15716 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
15717 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15718 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15719 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* [[TMP0]], <2 x float> [[TMP2]], i32 4)
15720 // CHECK: ret void
test_vst1_f32(float32_t * a,float32x2_t b)15721 void test_vst1_f32(float32_t * a, float32x2_t b) {
15722 vst1_f32(a, b);
15723 }
15724
15725 // CHECK-LABEL: @test_vst1_p8(
15726 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15727 // CHECK: ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)15728 void test_vst1_p8(poly8_t * a, poly8x8_t b) {
15729 vst1_p8(a, b);
15730 }
15731
15732 // CHECK-LABEL: @test_vst1_p16(
15733 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15734 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15735 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15736 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15737 // CHECK: ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)15738 void test_vst1_p16(poly16_t * a, poly16x4_t b) {
15739 vst1_p16(a, b);
15740 }
15741
15742 // CHECK-LABEL: @test_vst1q_lane_u8(
15743 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15744 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15745 // CHECK: ret void
test_vst1q_lane_u8(uint8_t * a,uint8x16_t b)15746 void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) {
15747 vst1q_lane_u8(a, b, 15);
15748 }
15749
15750 // CHECK-LABEL: @test_vst1q_lane_u16(
15751 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15752 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15753 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15754 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15755 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15756 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15757 // CHECK: ret void
test_vst1q_lane_u16(uint16_t * a,uint16x8_t b)15758 void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) {
15759 vst1q_lane_u16(a, b, 7);
15760 }
15761
15762 // CHECK-LABEL: @test_vst1q_lane_u32(
15763 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15764 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15765 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15766 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15767 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15768 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
15769 // CHECK: ret void
test_vst1q_lane_u32(uint32_t * a,uint32x4_t b)15770 void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) {
15771 vst1q_lane_u32(a, b, 3);
15772 }
15773
15774 // CHECK-LABEL: @test_vst1q_lane_u64(
15775 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15776 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15777 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15778 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15779 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
15780 // CHECK: ret void
test_vst1q_lane_u64(uint64_t * a,uint64x2_t b)15781 void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) {
15782 vst1q_lane_u64(a, b, 1);
15783 }
15784
15785 // CHECK-LABEL: @test_vst1q_lane_s8(
15786 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15787 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15788 // CHECK: ret void
test_vst1q_lane_s8(int8_t * a,int8x16_t b)15789 void test_vst1q_lane_s8(int8_t * a, int8x16_t b) {
15790 vst1q_lane_s8(a, b, 15);
15791 }
15792
15793 // CHECK-LABEL: @test_vst1q_lane_s16(
15794 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15795 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15796 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15797 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15798 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15799 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15800 // CHECK: ret void
test_vst1q_lane_s16(int16_t * a,int16x8_t b)15801 void test_vst1q_lane_s16(int16_t * a, int16x8_t b) {
15802 vst1q_lane_s16(a, b, 7);
15803 }
15804
15805 // CHECK-LABEL: @test_vst1q_lane_s32(
15806 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15807 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15808 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15809 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15810 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15811 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
15812 // CHECK: ret void
test_vst1q_lane_s32(int32_t * a,int32x4_t b)15813 void test_vst1q_lane_s32(int32_t * a, int32x4_t b) {
15814 vst1q_lane_s32(a, b, 3);
15815 }
15816
15817 // CHECK-LABEL: @test_vst1q_lane_s64(
15818 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15819 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15820 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15821 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15822 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
15823 // CHECK: ret void
test_vst1q_lane_s64(int64_t * a,int64x2_t b)15824 void test_vst1q_lane_s64(int64_t * a, int64x2_t b) {
15825 vst1q_lane_s64(a, b, 1);
15826 }
15827
15828 // CHECK-LABEL: @test_vst1q_lane_f16(
15829 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
15830 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15831 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15832 // CHECK: [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
15833 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
15834 // CHECK: store half [[TMP3]], half* [[TMP4]], align 2
15835 // CHECK: ret void
test_vst1q_lane_f16(float16_t * a,float16x8_t b)15836 void test_vst1q_lane_f16(float16_t * a, float16x8_t b) {
15837 vst1q_lane_f16(a, b, 7);
15838 }
15839
15840 // CHECK-LABEL: @test_vst1q_lane_f32(
15841 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
15842 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15843 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15844 // CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
15845 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
15846 // CHECK: store float [[TMP3]], float* [[TMP4]], align 4
15847 // CHECK: ret void
test_vst1q_lane_f32(float32_t * a,float32x4_t b)15848 void test_vst1q_lane_f32(float32_t * a, float32x4_t b) {
15849 vst1q_lane_f32(a, b, 3);
15850 }
15851
15852 // CHECK-LABEL: @test_vst1q_lane_p8(
15853 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15854 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15855 // CHECK: ret void
test_vst1q_lane_p8(poly8_t * a,poly8x16_t b)15856 void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) {
15857 vst1q_lane_p8(a, b, 15);
15858 }
15859
15860 // CHECK-LABEL: @test_vst1q_lane_p16(
15861 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15862 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15863 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15864 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15865 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15866 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15867 // CHECK: ret void
test_vst1q_lane_p16(poly16_t * a,poly16x8_t b)15868 void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) {
15869 vst1q_lane_p16(a, b, 7);
15870 }
15871
15872 // CHECK-LABEL: @test_vst1_lane_u8(
15873 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15874 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15875 // CHECK: ret void
test_vst1_lane_u8(uint8_t * a,uint8x8_t b)15876 void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) {
15877 vst1_lane_u8(a, b, 7);
15878 }
15879
15880 // CHECK-LABEL: @test_vst1_lane_u16(
15881 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15882 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15883 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15884 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15885 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15886 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15887 // CHECK: ret void
test_vst1_lane_u16(uint16_t * a,uint16x4_t b)15888 void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) {
15889 vst1_lane_u16(a, b, 3);
15890 }
15891
15892 // CHECK-LABEL: @test_vst1_lane_u32(
15893 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15894 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15895 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15896 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15897 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15898 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
15899 // CHECK: ret void
test_vst1_lane_u32(uint32_t * a,uint32x2_t b)15900 void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) {
15901 vst1_lane_u32(a, b, 1);
15902 }
15903
15904 // CHECK-LABEL: @test_vst1_lane_u64(
15905 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15906 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15907 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15908 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15909 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
15910 // CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4
15911 // CHECK: ret void
test_vst1_lane_u64(uint64_t * a,uint64x1_t b)15912 void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) {
15913 vst1_lane_u64(a, b, 0);
15914 }
15915
15916 // CHECK-LABEL: @test_vst1_lane_s8(
15917 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15918 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15919 // CHECK: ret void
test_vst1_lane_s8(int8_t * a,int8x8_t b)15920 void test_vst1_lane_s8(int8_t * a, int8x8_t b) {
15921 vst1_lane_s8(a, b, 7);
15922 }
15923
15924 // CHECK-LABEL: @test_vst1_lane_s16(
15925 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15926 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15927 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15928 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15929 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15930 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15931 // CHECK: ret void
test_vst1_lane_s16(int16_t * a,int16x4_t b)15932 void test_vst1_lane_s16(int16_t * a, int16x4_t b) {
15933 vst1_lane_s16(a, b, 3);
15934 }
15935
15936 // CHECK-LABEL: @test_vst1_lane_s32(
15937 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15938 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15939 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15940 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15941 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15942 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
15943 // CHECK: ret void
test_vst1_lane_s32(int32_t * a,int32x2_t b)15944 void test_vst1_lane_s32(int32_t * a, int32x2_t b) {
15945 vst1_lane_s32(a, b, 1);
15946 }
15947
15948 // CHECK-LABEL: @test_vst1_lane_s64(
15949 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15950 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15951 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15952 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15953 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
15954 // CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4
15955 // CHECK: ret void
test_vst1_lane_s64(int64_t * a,int64x1_t b)15956 void test_vst1_lane_s64(int64_t * a, int64x1_t b) {
15957 vst1_lane_s64(a, b, 0);
15958 }
15959
15960 // CHECK-LABEL: @test_vst1_lane_f16(
15961 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
15962 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15963 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15964 // CHECK: [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
15965 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
15966 // CHECK: store half [[TMP3]], half* [[TMP4]], align 2
15967 // CHECK: ret void
test_vst1_lane_f16(float16_t * a,float16x4_t b)15968 void test_vst1_lane_f16(float16_t * a, float16x4_t b) {
15969 vst1_lane_f16(a, b, 3);
15970 }
15971
15972 // CHECK-LABEL: @test_vst1_lane_f32(
15973 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
15974 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15975 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15976 // CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
15977 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
15978 // CHECK: store float [[TMP3]], float* [[TMP4]], align 4
15979 // CHECK: ret void
test_vst1_lane_f32(float32_t * a,float32x2_t b)15980 void test_vst1_lane_f32(float32_t * a, float32x2_t b) {
15981 vst1_lane_f32(a, b, 1);
15982 }
15983
15984 // CHECK-LABEL: @test_vst1_lane_p8(
15985 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15986 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15987 // CHECK: ret void
test_vst1_lane_p8(poly8_t * a,poly8x8_t b)15988 void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) {
15989 vst1_lane_p8(a, b, 7);
15990 }
15991
15992 // CHECK-LABEL: @test_vst1_lane_p16(
15993 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15994 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15995 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15996 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15997 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15998 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15999 // CHECK: ret void
test_vst1_lane_p16(poly16_t * a,poly16x4_t b)16000 void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) {
16001 vst1_lane_p16(a, b, 3);
16002 }
16003
16004 // CHECK-LABEL: @test_vst2q_u8(
16005 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
16006 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
16007 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
16008 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16009 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16010 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
16011 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
16012 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16013 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
16014 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16015 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16016 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
16017 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16018 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16019 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16020 // CHECK: ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)16021 void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) {
16022 vst2q_u8(a, b);
16023 }
16024
16025 // CHECK-LABEL: @test_vst2q_u16(
16026 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
16027 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
16028 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
16029 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16030 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16031 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
16032 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
16033 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16034 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16035 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16036 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16037 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16038 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16039 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16040 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16041 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16042 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16043 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16044 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16045 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16046 // CHECK: ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)16047 void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) {
16048 vst2q_u16(a, b);
16049 }
16050
16051 // CHECK-LABEL: @test_vst2q_u32(
16052 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
16053 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
16054 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
16055 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16056 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16057 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
16058 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
16059 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16060 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16061 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16062 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16063 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16064 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16065 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16066 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16067 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16068 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16069 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16070 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16071 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
16072 // CHECK: ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)16073 void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) {
16074 vst2q_u32(a, b);
16075 }
16076
16077 // CHECK-LABEL: @test_vst2q_s8(
16078 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
16079 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
16080 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
16081 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16082 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16083 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
16084 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
16085 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16086 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
16087 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16088 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16089 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
16090 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16091 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16092 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16093 // CHECK: ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)16094 void test_vst2q_s8(int8_t * a, int8x16x2_t b) {
16095 vst2q_s8(a, b);
16096 }
16097
16098 // CHECK-LABEL: @test_vst2q_s16(
16099 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
16100 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
16101 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
16102 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16103 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16104 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
16105 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
16106 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16107 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16108 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16109 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16110 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16111 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16112 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16113 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16114 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16115 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16116 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16117 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16118 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16119 // CHECK: ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)16120 void test_vst2q_s16(int16_t * a, int16x8x2_t b) {
16121 vst2q_s16(a, b);
16122 }
16123
16124 // CHECK-LABEL: @test_vst2q_s32(
16125 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
16126 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
16127 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
16128 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16129 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16130 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
16131 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
16132 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16133 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16134 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16135 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16136 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16137 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16138 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16139 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16140 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16141 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16142 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16143 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16144 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
16145 // CHECK: ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)16146 void test_vst2q_s32(int32_t * a, int32x4x2_t b) {
16147 vst2q_s32(a, b);
16148 }
16149
16150 // CHECK-LABEL: @test_vst2q_f16(
16151 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
16152 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
16153 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
16154 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
16155 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16156 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
16157 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
16158 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16159 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
16160 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16161 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
16162 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16163 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16164 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16165 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
16166 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16167 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16168 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16169 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16170 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 2)
16171 // CHECK: ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)16172 void test_vst2q_f16(float16_t * a, float16x8x2_t b) {
16173 vst2q_f16(a, b);
16174 }
16175
16176 // CHECK-LABEL: @test_vst2q_f32(
16177 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
16178 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
16179 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
16180 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
16181 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16182 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
16183 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
16184 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16185 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
16186 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16187 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
16188 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16189 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16190 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16191 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
16192 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16193 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16194 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16195 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16196 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 4)
16197 // CHECK: ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)16198 void test_vst2q_f32(float32_t * a, float32x4x2_t b) {
16199 vst2q_f32(a, b);
16200 }
16201
16202 // CHECK-LABEL: @test_vst2q_p8(
16203 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
16204 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
16205 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
16206 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16207 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16208 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
16209 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
16210 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16211 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
16212 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16213 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16214 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
16215 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16216 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16217 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16218 // CHECK: ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)16219 void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) {
16220 vst2q_p8(a, b);
16221 }
16222
16223 // CHECK-LABEL: @test_vst2q_p16(
16224 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
16225 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
16226 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
16227 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16228 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16229 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
16230 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
16231 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16232 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16233 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16234 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16235 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16236 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16237 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16238 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16239 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16240 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16241 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16242 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16243 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16244 // CHECK: ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)16245 void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) {
16246 vst2q_p16(a, b);
16247 }
16248
16249 // CHECK-LABEL: @test_vst2_u8(
16250 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
16251 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
16252 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
16253 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16254 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16255 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
16256 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
16257 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16258 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16259 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16260 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16261 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16262 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16263 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16264 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16265 // CHECK: ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)16266 void test_vst2_u8(uint8_t * a, uint8x8x2_t b) {
16267 vst2_u8(a, b);
16268 }
16269
16270 // CHECK-LABEL: @test_vst2_u16(
16271 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
16272 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
16273 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
16274 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16275 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16276 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
16277 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
16278 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16279 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16280 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16281 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16282 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16283 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16284 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16285 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16286 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16287 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16288 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16289 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16290 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16291 // CHECK: ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)16292 void test_vst2_u16(uint16_t * a, uint16x4x2_t b) {
16293 vst2_u16(a, b);
16294 }
16295
16296 // CHECK-LABEL: @test_vst2_u32(
16297 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
16298 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
16299 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
16300 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16301 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16302 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
16303 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
16304 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16305 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16306 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16307 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16308 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16309 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16310 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16311 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16312 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16313 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16314 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16315 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16316 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
16317 // CHECK: ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)16318 void test_vst2_u32(uint32_t * a, uint32x2x2_t b) {
16319 vst2_u32(a, b);
16320 }
16321
16322 // CHECK-LABEL: @test_vst2_u64(
16323 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
16324 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
16325 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
16326 // CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
16327 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16328 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
16329 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
16330 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16331 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
16332 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
16333 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
16334 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16335 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16336 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
16337 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
16338 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16339 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16340 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16341 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16342 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
16343 // CHECK: ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)16344 void test_vst2_u64(uint64_t * a, uint64x1x2_t b) {
16345 vst2_u64(a, b);
16346 }
16347
16348 // CHECK-LABEL: @test_vst2_s8(
16349 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
16350 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
16351 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
16352 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16353 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16354 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
16355 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
16356 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16357 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16358 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16359 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16360 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16361 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16362 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16363 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16364 // CHECK: ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)16365 void test_vst2_s8(int8_t * a, int8x8x2_t b) {
16366 vst2_s8(a, b);
16367 }
16368
16369 // CHECK-LABEL: @test_vst2_s16(
16370 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
16371 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
16372 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
16373 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16374 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16375 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
16376 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
16377 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16378 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16379 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16380 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16381 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16382 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16383 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16384 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16385 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16386 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16387 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16388 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16389 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16390 // CHECK: ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)16391 void test_vst2_s16(int16_t * a, int16x4x2_t b) {
16392 vst2_s16(a, b);
16393 }
16394
16395 // CHECK-LABEL: @test_vst2_s32(
16396 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
16397 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
16398 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
16399 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16400 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16401 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
16402 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
16403 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16404 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16405 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16406 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16407 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16408 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16409 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16410 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16411 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16412 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16413 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16414 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16415 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
16416 // CHECK: ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)16417 void test_vst2_s32(int32_t * a, int32x2x2_t b) {
16418 vst2_s32(a, b);
16419 }
16420
16421 // CHECK-LABEL: @test_vst2_s64(
16422 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
16423 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
16424 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
16425 // CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
16426 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16427 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
16428 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
16429 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16430 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
16431 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
16432 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
16433 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16434 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16435 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
16436 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
16437 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16438 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16439 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16440 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16441 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
16442 // CHECK: ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)16443 void test_vst2_s64(int64_t * a, int64x1x2_t b) {
16444 vst2_s64(a, b);
16445 }
16446
16447 // CHECK-LABEL: @test_vst2_f16(
16448 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16449 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16450 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16451 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
16452 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16453 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16454 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16455 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16456 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
16457 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16458 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
16459 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16460 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16461 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16462 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
16463 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16464 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16465 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16466 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16467 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 2)
16468 // CHECK: ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)16469 void test_vst2_f16(float16_t * a, float16x4x2_t b) {
16470 vst2_f16(a, b);
16471 }
16472
16473 // CHECK-LABEL: @test_vst2_f32(
16474 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16475 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16476 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16477 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
16478 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16479 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16480 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16481 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16482 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
16483 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16484 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
16485 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16486 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16487 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16488 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
16489 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16490 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16491 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16492 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16493 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 4)
16494 // CHECK: ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)16495 void test_vst2_f32(float32_t * a, float32x2x2_t b) {
16496 vst2_f32(a, b);
16497 }
16498
16499 // CHECK-LABEL: @test_vst2_p8(
16500 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16501 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16502 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16503 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16504 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16505 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16506 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16507 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16508 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16509 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16510 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16511 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16512 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16513 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16514 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16515 // CHECK: ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)16516 void test_vst2_p8(poly8_t * a, poly8x8x2_t b) {
16517 vst2_p8(a, b);
16518 }
16519
16520 // CHECK-LABEL: @test_vst2_p16(
16521 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16522 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16523 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16524 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16525 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16526 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16527 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16528 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16529 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16530 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16531 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16532 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16533 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16534 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16535 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16536 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16537 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16538 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16539 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16540 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16541 // CHECK: ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)16542 void test_vst2_p16(poly16_t * a, poly16x4x2_t b) {
16543 vst2_p16(a, b);
16544 }
16545
16546 // CHECK-LABEL: @test_vst2q_lane_u16(
16547 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
16548 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
16549 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
16550 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16551 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16552 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
16553 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
16554 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16555 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16556 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16557 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16558 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16559 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16560 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16561 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16562 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16563 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16564 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16565 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16566 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16567 // CHECK: ret void
test_vst2q_lane_u16(uint16_t * a,uint16x8x2_t b)16568 void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) {
16569 vst2q_lane_u16(a, b, 7);
16570 }
16571
16572 // CHECK-LABEL: @test_vst2q_lane_u32(
16573 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
16574 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
16575 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
16576 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16577 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16578 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
16579 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
16580 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16581 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16582 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16583 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16584 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16585 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16586 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16587 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16588 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16589 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16590 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16591 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16592 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
16593 // CHECK: ret void
test_vst2q_lane_u32(uint32_t * a,uint32x4x2_t b)16594 void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) {
16595 vst2q_lane_u32(a, b, 3);
16596 }
16597
16598 // CHECK-LABEL: @test_vst2q_lane_s16(
16599 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
16600 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
16601 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
16602 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16603 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16604 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
16605 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
16606 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16607 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16608 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16609 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16610 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16611 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16612 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16613 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16614 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16615 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16616 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16617 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16618 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16619 // CHECK: ret void
test_vst2q_lane_s16(int16_t * a,int16x8x2_t b)16620 void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) {
16621 vst2q_lane_s16(a, b, 7);
16622 }
16623
16624 // CHECK-LABEL: @test_vst2q_lane_s32(
16625 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
16626 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
16627 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
16628 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16629 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16630 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
16631 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
16632 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16633 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16634 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16635 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16636 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16637 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16638 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16639 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16640 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16641 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16642 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16643 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16644 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
16645 // CHECK: ret void
test_vst2q_lane_s32(int32_t * a,int32x4x2_t b)16646 void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) {
16647 vst2q_lane_s32(a, b, 3);
16648 }
16649
16650 // CHECK-LABEL: @test_vst2q_lane_f16(
16651 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
16652 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
16653 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
16654 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
16655 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16656 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
16657 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
16658 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16659 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
16660 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16661 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
16662 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16663 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16664 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16665 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
16666 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16667 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16668 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16669 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16670 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 7, i32 2)
16671 // CHECK: ret void
test_vst2q_lane_f16(float16_t * a,float16x8x2_t b)16672 void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) {
16673 vst2q_lane_f16(a, b, 7);
16674 }
16675
16676 // CHECK-LABEL: @test_vst2q_lane_f32(
16677 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
16678 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
16679 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
16680 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
16681 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16682 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
16683 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
16684 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16685 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
16686 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16687 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
16688 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16689 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16690 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16691 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
16692 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16693 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16694 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16695 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16696 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 3, i32 4)
16697 // CHECK: ret void
test_vst2q_lane_f32(float32_t * a,float32x4x2_t b)16698 void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) {
16699 vst2q_lane_f32(a, b, 3);
16700 }
16701
16702 // CHECK-LABEL: @test_vst2q_lane_p16(
16703 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
16704 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
16705 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
16706 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16707 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16708 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
16709 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
16710 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16711 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16712 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16713 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16714 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16715 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16716 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16717 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16718 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16719 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16720 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16721 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16722 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16723 // CHECK: ret void
test_vst2q_lane_p16(poly16_t * a,poly16x8x2_t b)16724 void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) {
16725 vst2q_lane_p16(a, b, 7);
16726 }
16727
16728 // CHECK-LABEL: @test_vst2_lane_u8(
16729 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
16730 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
16731 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
16732 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16733 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16734 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
16735 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
16736 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16737 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16738 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16739 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16740 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16741 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16742 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16743 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16744 // CHECK: ret void
test_vst2_lane_u8(uint8_t * a,uint8x8x2_t b)16745 void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) {
16746 vst2_lane_u8(a, b, 7);
16747 }
16748
16749 // CHECK-LABEL: @test_vst2_lane_u16(
16750 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
16751 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
16752 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
16753 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16754 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16755 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
16756 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
16757 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16758 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16759 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16760 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16761 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16762 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16763 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16764 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16765 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16766 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16767 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16768 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16769 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16770 // CHECK: ret void
test_vst2_lane_u16(uint16_t * a,uint16x4x2_t b)16771 void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) {
16772 vst2_lane_u16(a, b, 3);
16773 }
16774
16775 // CHECK-LABEL: @test_vst2_lane_u32(
16776 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
16777 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
16778 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
16779 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16780 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16781 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
16782 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
16783 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16784 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16785 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16786 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16787 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16788 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16789 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16790 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16791 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16792 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16793 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16794 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16795 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16796 // CHECK: ret void
test_vst2_lane_u32(uint32_t * a,uint32x2x2_t b)16797 void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) {
16798 vst2_lane_u32(a, b, 1);
16799 }
16800
16801 // CHECK-LABEL: @test_vst2_lane_s8(
16802 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
16803 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
16804 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
16805 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16806 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16807 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
16808 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
16809 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16810 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16811 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16812 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16813 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16814 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16815 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16816 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16817 // CHECK: ret void
test_vst2_lane_s8(int8_t * a,int8x8x2_t b)16818 void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) {
16819 vst2_lane_s8(a, b, 7);
16820 }
16821
16822 // CHECK-LABEL: @test_vst2_lane_s16(
16823 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
16824 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
16825 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
16826 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16827 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16828 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
16829 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
16830 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16831 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16832 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16833 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16834 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16835 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16836 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16837 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16838 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16839 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16840 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16841 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16842 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16843 // CHECK: ret void
test_vst2_lane_s16(int16_t * a,int16x4x2_t b)16844 void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) {
16845 vst2_lane_s16(a, b, 3);
16846 }
16847
16848 // CHECK-LABEL: @test_vst2_lane_s32(
16849 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
16850 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
16851 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
16852 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16853 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16854 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
16855 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
16856 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16857 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16858 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16859 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16860 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16861 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16862 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16863 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16864 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16865 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16866 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16867 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16868 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16869 // CHECK: ret void
test_vst2_lane_s32(int32_t * a,int32x2x2_t b)16870 void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) {
16871 vst2_lane_s32(a, b, 1);
16872 }
16873
16874 // CHECK-LABEL: @test_vst2_lane_f16(
16875 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16876 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16877 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16878 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
16879 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16880 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16881 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16882 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16883 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
16884 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16885 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
16886 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16887 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16888 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16889 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
16890 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16891 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16892 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16893 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16894 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 3, i32 2)
16895 // CHECK: ret void
test_vst2_lane_f16(float16_t * a,float16x4x2_t b)16896 void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) {
16897 vst2_lane_f16(a, b, 3);
16898 }
16899
16900 // CHECK-LABEL: @test_vst2_lane_f32(
16901 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16902 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16903 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16904 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
16905 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16906 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16907 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16908 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16909 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
16910 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16911 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
16912 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16913 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16914 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16915 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
16916 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16917 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16918 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16919 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16920 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 1, i32 4)
16921 // CHECK: ret void
test_vst2_lane_f32(float32_t * a,float32x2x2_t b)16922 void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) {
16923 vst2_lane_f32(a, b, 1);
16924 }
16925
16926 // CHECK-LABEL: @test_vst2_lane_p8(
16927 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16928 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16929 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16930 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16931 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16932 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16933 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16934 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16935 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16936 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16937 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16938 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16939 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16940 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16941 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16942 // CHECK: ret void
test_vst2_lane_p8(poly8_t * a,poly8x8x2_t b)16943 void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) {
16944 vst2_lane_p8(a, b, 7);
16945 }
16946
16947 // CHECK-LABEL: @test_vst2_lane_p16(
16948 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16949 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16950 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16951 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16952 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16953 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16954 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16955 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16956 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16957 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16958 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16959 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16960 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16961 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16962 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16963 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16964 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16965 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16966 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16967 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16968 // CHECK: ret void
test_vst2_lane_p16(poly16_t * a,poly16x4x2_t b)16969 void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) {
16970 vst2_lane_p16(a, b, 3);
16971 }
16972
16973 // CHECK-LABEL: @test_vst3q_u8(
16974 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
16975 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
16976 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
16977 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
16978 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
16979 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
16980 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
16981 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
16982 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16983 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
16984 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16985 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16986 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16987 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16988 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16989 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
16990 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16991 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
16992 // CHECK: ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)16993 void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) {
16994 vst3q_u8(a, b);
16995 }
16996
16997 // CHECK-LABEL: @test_vst3q_u16(
16998 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
16999 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
17000 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
17001 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17002 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17003 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
17004 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
17005 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17006 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17007 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17008 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17009 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17010 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17011 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17012 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17013 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17014 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17015 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17016 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17017 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17018 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17019 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17020 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17021 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17022 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17023 // CHECK: ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)17024 void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) {
17025 vst3q_u16(a, b);
17026 }
17027
17028 // CHECK-LABEL: @test_vst3q_u32(
17029 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
17030 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
17031 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
17032 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17033 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17034 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
17035 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
17036 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17037 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17038 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17039 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17040 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17041 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17042 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17043 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17044 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17045 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17046 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17047 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17048 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17049 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17050 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17051 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17052 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17053 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
17054 // CHECK: ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)17055 void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) {
17056 vst3q_u32(a, b);
17057 }
17058
17059 // CHECK-LABEL: @test_vst3q_s8(
17060 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
17061 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
17062 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
17063 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
17064 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17065 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
17066 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
17067 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17068 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17069 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
17070 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17071 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17072 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
17073 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17074 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17075 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
17076 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17077 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
17078 // CHECK: ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)17079 void test_vst3q_s8(int8_t * a, int8x16x3_t b) {
17080 vst3q_s8(a, b);
17081 }
17082
17083 // CHECK-LABEL: @test_vst3q_s16(
17084 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
17085 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
17086 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
17087 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17088 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17089 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
17090 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
17091 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17092 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17093 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17094 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17095 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17096 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17097 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17098 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17099 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17100 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17101 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17102 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17103 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17104 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17105 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17106 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17107 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17108 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17109 // CHECK: ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)17110 void test_vst3q_s16(int16_t * a, int16x8x3_t b) {
17111 vst3q_s16(a, b);
17112 }
17113
17114 // CHECK-LABEL: @test_vst3q_s32(
17115 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
17116 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
17117 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
17118 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17119 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17120 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
17121 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
17122 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17123 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17124 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17125 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17126 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17127 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17128 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17129 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17130 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17131 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17132 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17133 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17134 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17135 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17136 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17137 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17138 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17139 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
17140 // CHECK: ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)17141 void test_vst3q_s32(int32_t * a, int32x4x3_t b) {
17142 vst3q_s32(a, b);
17143 }
17144
17145 // CHECK-LABEL: @test_vst3q_f16(
17146 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
17147 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
17148 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
17149 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
17150 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17151 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
17152 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
17153 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17154 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
17155 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17156 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
17157 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17158 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
17159 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17160 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
17161 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17162 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
17163 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17164 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
17165 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17166 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17167 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17168 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17169 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17170 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 2)
17171 // CHECK: ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)17172 void test_vst3q_f16(float16_t * a, float16x8x3_t b) {
17173 vst3q_f16(a, b);
17174 }
17175
17176 // CHECK-LABEL: @test_vst3q_f32(
17177 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
17178 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
17179 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
17180 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
17181 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17182 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
17183 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
17184 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17185 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
17186 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17187 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
17188 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17189 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17190 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17191 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
17192 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17193 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17194 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17195 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
17196 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17197 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17198 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17199 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17200 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17201 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 4)
17202 // CHECK: ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)17203 void test_vst3q_f32(float32_t * a, float32x4x3_t b) {
17204 vst3q_f32(a, b);
17205 }
17206
17207 // CHECK-LABEL: @test_vst3q_p8(
17208 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
17209 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
17210 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
17211 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
17212 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17213 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
17214 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
17215 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17216 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17217 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
17218 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17219 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17220 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
17221 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17222 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17223 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
17224 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17225 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
17226 // CHECK: ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)17227 void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) {
17228 vst3q_p8(a, b);
17229 }
17230
17231 // CHECK-LABEL: @test_vst3q_p16(
17232 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
17233 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
17234 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
17235 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17236 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17237 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
17238 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
17239 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17240 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17241 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17242 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17243 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17244 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17245 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17246 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17247 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17248 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17249 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17250 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17251 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17252 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17253 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17254 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17255 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17256 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17257 // CHECK: ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)17258 void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) {
17259 vst3q_p16(a, b);
17260 }
17261
17262 // CHECK-LABEL: @test_vst3_u8(
17263 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
17264 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
17265 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
17266 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17267 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17268 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
17269 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
17270 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17271 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17272 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17273 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17274 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17275 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17276 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17277 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17278 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17279 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17280 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17281 // CHECK: ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)17282 void test_vst3_u8(uint8_t * a, uint8x8x3_t b) {
17283 vst3_u8(a, b);
17284 }
17285
17286 // CHECK-LABEL: @test_vst3_u16(
17287 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
17288 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
17289 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
17290 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17291 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17292 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
17293 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
17294 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17295 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17296 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17297 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17298 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17299 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17300 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17301 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17302 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17303 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17304 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17305 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17306 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17307 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17308 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17309 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17310 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17311 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17312 // CHECK: ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)17313 void test_vst3_u16(uint16_t * a, uint16x4x3_t b) {
17314 vst3_u16(a, b);
17315 }
17316
17317 // CHECK-LABEL: @test_vst3_u32(
17318 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
17319 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
17320 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
17321 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17322 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17323 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
17324 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
17325 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17326 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17327 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17328 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17329 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17330 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17331 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17332 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17333 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17334 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17335 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17336 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17337 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17338 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17339 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17340 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17341 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17342 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
17343 // CHECK: ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)17344 void test_vst3_u32(uint32_t * a, uint32x2x3_t b) {
17345 vst3_u32(a, b);
17346 }
17347
17348 // CHECK-LABEL: @test_vst3_u64(
17349 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
17350 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
17351 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
17352 // CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
17353 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17354 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
17355 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
17356 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17357 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
17358 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17359 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
17360 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17361 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17362 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17363 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
17364 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17365 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17366 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17367 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
17368 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17369 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17370 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17371 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17372 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17373 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
17374 // CHECK: ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)17375 void test_vst3_u64(uint64_t * a, uint64x1x3_t b) {
17376 vst3_u64(a, b);
17377 }
17378
17379 // CHECK-LABEL: @test_vst3_s8(
17380 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
17381 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
17382 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
17383 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17384 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17385 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
17386 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
17387 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17388 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17389 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17390 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17391 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17392 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17393 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17394 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17395 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17396 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17397 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17398 // CHECK: ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)17399 void test_vst3_s8(int8_t * a, int8x8x3_t b) {
17400 vst3_s8(a, b);
17401 }
17402
17403 // CHECK-LABEL: @test_vst3_s16(
17404 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
17405 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
17406 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
17407 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17408 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17409 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
17410 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
17411 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17412 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17413 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17414 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17415 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17416 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17417 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17418 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17419 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17420 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17421 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17422 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17423 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17424 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17425 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17426 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17427 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17428 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17429 // CHECK: ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)17430 void test_vst3_s16(int16_t * a, int16x4x3_t b) {
17431 vst3_s16(a, b);
17432 }
17433
17434 // CHECK-LABEL: @test_vst3_s32(
17435 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
17436 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
17437 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
17438 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17439 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17440 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
17441 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
17442 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17443 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17444 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17445 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17446 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17447 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17448 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17449 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17450 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17451 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17452 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17453 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17454 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17455 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17456 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17457 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17458 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17459 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
17460 // CHECK: ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)17461 void test_vst3_s32(int32_t * a, int32x2x3_t b) {
17462 vst3_s32(a, b);
17463 }
17464
17465 // CHECK-LABEL: @test_vst3_s64(
17466 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
17467 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
17468 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
17469 // CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
17470 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17471 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
17472 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
17473 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17474 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
17475 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17476 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
17477 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17478 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17479 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17480 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
17481 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17482 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17483 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17484 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
17485 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17486 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17487 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17488 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17489 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17490 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
17491 // CHECK: ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)17492 void test_vst3_s64(int64_t * a, int64x1x3_t b) {
17493 vst3_s64(a, b);
17494 }
17495
17496 // CHECK-LABEL: @test_vst3_f16(
17497 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
17498 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
17499 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
17500 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
17501 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17502 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
17503 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
17504 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17505 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
17506 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17507 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
17508 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
17509 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
17510 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17511 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
17512 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
17513 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
17514 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17515 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
17516 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
17517 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
17518 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
17519 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
17520 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
17521 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 2)
17522 // CHECK: ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)17523 void test_vst3_f16(float16_t * a, float16x4x3_t b) {
17524 vst3_f16(a, b);
17525 }
17526
17527 // CHECK-LABEL: @test_vst3_f32(
17528 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
17529 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
17530 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
17531 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
17532 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17533 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
17534 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
17535 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17536 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
17537 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17538 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
17539 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
17540 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
17541 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17542 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
17543 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
17544 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
17545 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17546 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
17547 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
17548 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
17549 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
17550 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
17551 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
17552 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 4)
17553 // CHECK: ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)17554 void test_vst3_f32(float32_t * a, float32x2x3_t b) {
17555 vst3_f32(a, b);
17556 }
17557
17558 // CHECK-LABEL: @test_vst3_p8(
17559 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
17560 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
17561 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
17562 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17563 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17564 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
17565 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
17566 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17567 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17568 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17569 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17570 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17571 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17572 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17573 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17574 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17575 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17576 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17577 // CHECK: ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)17578 void test_vst3_p8(poly8_t * a, poly8x8x3_t b) {
17579 vst3_p8(a, b);
17580 }
17581
17582 // CHECK-LABEL: @test_vst3_p16(
17583 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
17584 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
17585 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
17586 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17587 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17588 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
17589 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
17590 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17591 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17592 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17593 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17594 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17595 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17596 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17597 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17598 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17599 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17600 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17601 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17602 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17603 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17604 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17605 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17606 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17607 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17608 // CHECK: ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)17609 void test_vst3_p16(poly16_t * a, poly16x4x3_t b) {
17610 vst3_p16(a, b);
17611 }
17612
17613 // CHECK-LABEL: @test_vst3q_lane_u16(
17614 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
17615 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
17616 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
17617 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17618 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17619 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
17620 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
17621 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17622 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17623 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17624 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17625 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17626 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17627 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17628 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17629 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17630 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17631 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17632 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17633 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17634 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17635 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17636 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17637 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17638 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17639 // CHECK: ret void
test_vst3q_lane_u16(uint16_t * a,uint16x8x3_t b)17640 void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) {
17641 vst3q_lane_u16(a, b, 7);
17642 }
17643
17644 // CHECK-LABEL: @test_vst3q_lane_u32(
17645 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
17646 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
17647 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
17648 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17649 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17650 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
17651 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
17652 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17653 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17654 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17655 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17656 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17657 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17658 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17659 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17660 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17661 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17662 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17663 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17664 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17665 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17666 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17667 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17668 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17669 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
17670 // CHECK: ret void
test_vst3q_lane_u32(uint32_t * a,uint32x4x3_t b)17671 void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) {
17672 vst3q_lane_u32(a, b, 3);
17673 }
17674
17675 // CHECK-LABEL: @test_vst3q_lane_s16(
17676 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
17677 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
17678 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
17679 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17680 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17681 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
17682 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
17683 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17684 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17685 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17686 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17687 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17688 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17689 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17690 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17691 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17692 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17693 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17694 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17695 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17696 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17697 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17698 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17699 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17700 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17701 // CHECK: ret void
test_vst3q_lane_s16(int16_t * a,int16x8x3_t b)17702 void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) {
17703 vst3q_lane_s16(a, b, 7);
17704 }
17705
17706 // CHECK-LABEL: @test_vst3q_lane_s32(
17707 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
17708 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
17709 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
17710 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17711 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17712 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
17713 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
17714 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17715 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17716 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17717 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17718 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17719 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17720 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17721 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17722 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17723 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17724 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17725 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17726 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17727 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17728 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17729 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17730 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17731 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
17732 // CHECK: ret void
test_vst3q_lane_s32(int32_t * a,int32x4x3_t b)17733 void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) {
17734 vst3q_lane_s32(a, b, 3);
17735 }
17736
17737 // CHECK-LABEL: @test_vst3q_lane_f16(
17738 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
17739 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
17740 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
17741 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
17742 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17743 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
17744 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
17745 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17746 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
17747 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17748 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
17749 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17750 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
17751 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17752 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
17753 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17754 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
17755 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17756 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
17757 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17758 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17759 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17760 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17761 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17762 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 7, i32 2)
17763 // CHECK: ret void
test_vst3q_lane_f16(float16_t * a,float16x8x3_t b)17764 void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) {
17765 vst3q_lane_f16(a, b, 7);
17766 }
17767
17768 // CHECK-LABEL: @test_vst3q_lane_f32(
17769 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
17770 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
17771 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
17772 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
17773 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17774 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
17775 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
17776 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17777 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
17778 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17779 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
17780 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17781 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17782 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17783 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
17784 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17785 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17786 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17787 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
17788 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17789 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17790 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17791 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17792 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17793 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 3, i32 4)
17794 // CHECK: ret void
test_vst3q_lane_f32(float32_t * a,float32x4x3_t b)17795 void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) {
17796 vst3q_lane_f32(a, b, 3);
17797 }
17798
17799 // CHECK-LABEL: @test_vst3q_lane_p16(
17800 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
17801 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
17802 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
17803 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17804 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17805 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
17806 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
17807 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17808 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17809 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17810 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17811 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17812 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17813 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17814 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17815 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17816 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17817 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17818 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17819 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17820 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17821 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17822 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17823 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17824 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17825 // CHECK: ret void
test_vst3q_lane_p16(poly16_t * a,poly16x8x3_t b)17826 void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) {
17827 vst3q_lane_p16(a, b, 7);
17828 }
17829
17830 // CHECK-LABEL: @test_vst3_lane_u8(
17831 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
17832 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
17833 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
17834 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17835 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17836 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
17837 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
17838 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17839 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17840 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17841 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17842 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17843 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17844 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17845 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17846 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17847 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17848 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17849 // CHECK: ret void
test_vst3_lane_u8(uint8_t * a,uint8x8x3_t b)17850 void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) {
17851 vst3_lane_u8(a, b, 7);
17852 }
17853
17854 // CHECK-LABEL: @test_vst3_lane_u16(
17855 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
17856 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
17857 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
17858 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17859 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17860 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
17861 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
17862 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17863 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17864 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17865 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17866 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17867 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17868 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17869 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17870 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17871 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17872 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17873 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17874 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17875 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17876 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17877 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17878 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17879 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17880 // CHECK: ret void
test_vst3_lane_u16(uint16_t * a,uint16x4x3_t b)17881 void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) {
17882 vst3_lane_u16(a, b, 3);
17883 }
17884
17885 // CHECK-LABEL: @test_vst3_lane_u32(
17886 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
17887 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
17888 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
17889 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17890 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17891 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
17892 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
17893 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17894 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17895 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17896 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17897 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17898 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17899 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17900 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17901 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17902 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17903 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17904 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17905 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17906 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17907 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17908 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17909 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17910 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17911 // CHECK: ret void
test_vst3_lane_u32(uint32_t * a,uint32x2x3_t b)17912 void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) {
17913 vst3_lane_u32(a, b, 1);
17914 }
17915
17916 // CHECK-LABEL: @test_vst3_lane_s8(
17917 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
17918 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
17919 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
17920 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17921 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17922 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
17923 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
17924 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17925 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17926 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17927 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17928 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17929 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17930 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17931 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17932 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17933 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17934 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17935 // CHECK: ret void
test_vst3_lane_s8(int8_t * a,int8x8x3_t b)17936 void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) {
17937 vst3_lane_s8(a, b, 7);
17938 }
17939
17940 // CHECK-LABEL: @test_vst3_lane_s16(
17941 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
17942 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
17943 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
17944 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17945 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17946 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
17947 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
17948 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17949 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17950 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17951 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17952 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17953 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17954 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17955 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17956 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17957 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17958 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17959 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17960 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17961 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17962 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17963 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17964 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17965 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17966 // CHECK: ret void
test_vst3_lane_s16(int16_t * a,int16x4x3_t b)17967 void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) {
17968 vst3_lane_s16(a, b, 3);
17969 }
17970
17971 // CHECK-LABEL: @test_vst3_lane_s32(
17972 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
17973 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
17974 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
17975 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17976 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17977 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
17978 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
17979 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17980 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17981 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17982 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17983 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17984 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17985 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17986 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17987 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17988 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17989 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17990 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17991 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17992 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17993 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17994 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17995 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17996 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17997 // CHECK: ret void
test_vst3_lane_s32(int32_t * a,int32x2x3_t b)17998 void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) {
17999 vst3_lane_s32(a, b, 1);
18000 }
18001
18002 // CHECK-LABEL: @test_vst3_lane_f16(
18003 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
18004 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
18005 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
18006 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
18007 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18008 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
18009 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
18010 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18011 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
18012 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
18013 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
18014 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
18015 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
18016 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
18017 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
18018 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
18019 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
18020 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
18021 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
18022 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
18023 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
18024 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
18025 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
18026 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
18027 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 3, i32 2)
18028 // CHECK: ret void
test_vst3_lane_f16(float16_t * a,float16x4x3_t b)18029 void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) {
18030 vst3_lane_f16(a, b, 3);
18031 }
18032
18033 // CHECK-LABEL: @test_vst3_lane_f32(
18034 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
18035 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
18036 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
18037 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
18038 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18039 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
18040 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
18041 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18042 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
18043 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
18044 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
18045 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
18046 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
18047 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
18048 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
18049 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
18050 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18051 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
18052 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
18053 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
18054 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
18055 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18056 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18057 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
18058 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 1, i32 4)
18059 // CHECK: ret void
test_vst3_lane_f32(float32_t * a,float32x2x3_t b)18060 void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) {
18061 vst3_lane_f32(a, b, 1);
18062 }
18063
18064 // CHECK-LABEL: @test_vst3_lane_p8(
18065 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
18066 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
18067 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
18068 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
18069 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18070 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
18071 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
18072 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18073 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18074 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
18075 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18076 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18077 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18078 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18079 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18080 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18081 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18082 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
18083 // CHECK: ret void
test_vst3_lane_p8(poly8_t * a,poly8x8x3_t b)18084 void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) {
18085 vst3_lane_p8(a, b, 7);
18086 }
18087
18088 // CHECK-LABEL: @test_vst3_lane_p16(
18089 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
18090 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
18091 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
18092 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
18093 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18094 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
18095 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
18096 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18097 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18098 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18099 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
18100 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18101 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18102 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18103 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18104 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18105 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18106 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18107 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18108 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18109 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18110 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18111 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18112 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18113 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
18114 // CHECK: ret void
test_vst3_lane_p16(poly16_t * a,poly16x4x3_t b)18115 void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) {
18116 vst3_lane_p16(a, b, 3);
18117 }
18118
18119 // CHECK-LABEL: @test_vst4q_u8(
18120 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
18121 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
18122 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
18123 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18124 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18125 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
18126 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
18127 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18128 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18129 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18130 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18131 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18132 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18133 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18134 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18135 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18136 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18137 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18138 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18139 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18140 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18141 // CHECK: ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)18142 void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) {
18143 vst4q_u8(a, b);
18144 }
18145
18146 // CHECK-LABEL: @test_vst4q_u16(
18147 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
18148 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
18149 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
18150 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18151 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18152 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
18153 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
18154 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18155 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18156 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18157 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18158 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18159 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18160 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18161 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18162 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18163 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18164 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18165 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18166 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18167 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18168 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18169 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18170 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18171 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18172 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18173 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18174 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18175 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18176 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18177 // CHECK: ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)18178 void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) {
18179 vst4q_u16(a, b);
18180 }
18181
18182 // CHECK-LABEL: @test_vst4q_u32(
18183 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
18184 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
18185 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
18186 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18187 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18188 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
18189 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
18190 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18191 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18192 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18193 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18194 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18195 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18196 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18197 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18198 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18199 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18200 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18201 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18202 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18203 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18204 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18205 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18206 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18207 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18208 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18209 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18210 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18211 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18212 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
18213 // CHECK: ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)18214 void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) {
18215 vst4q_u32(a, b);
18216 }
18217
18218 // CHECK-LABEL: @test_vst4q_s8(
18219 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
18220 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
18221 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
18222 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18223 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18224 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
18225 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
18226 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18227 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18228 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18229 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18230 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18231 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18232 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18233 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18234 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18235 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18236 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18237 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18238 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18239 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18240 // CHECK: ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)18241 void test_vst4q_s8(int8_t * a, int8x16x4_t b) {
18242 vst4q_s8(a, b);
18243 }
18244
18245 // CHECK-LABEL: @test_vst4q_s16(
18246 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
18247 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
18248 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
18249 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18250 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18251 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
18252 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
18253 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18254 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18255 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18256 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18257 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18258 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18259 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18260 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18261 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18262 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18263 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18264 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18265 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18266 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18267 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18268 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18269 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18270 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18271 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18272 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18273 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18274 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18275 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18276 // CHECK: ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)18277 void test_vst4q_s16(int16_t * a, int16x8x4_t b) {
18278 vst4q_s16(a, b);
18279 }
18280
18281 // CHECK-LABEL: @test_vst4q_s32(
18282 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
18283 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
18284 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
18285 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18286 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18287 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
18288 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
18289 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18290 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18291 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18292 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18293 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18294 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18295 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18296 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18297 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18298 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18299 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18300 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18301 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18302 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18303 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18304 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18305 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18306 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18307 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18308 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18309 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18310 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18311 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
18312 // CHECK: ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)18313 void test_vst4q_s32(int32_t * a, int32x4x4_t b) {
18314 vst4q_s32(a, b);
18315 }
18316
18317 // CHECK-LABEL: @test_vst4q_f16(
18318 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
18319 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
18320 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
18321 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
18322 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18323 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
18324 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
18325 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18326 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
18327 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18328 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
18329 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
18330 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
18331 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18332 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
18333 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
18334 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
18335 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18336 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
18337 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
18338 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
18339 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18340 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
18341 // CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
18342 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
18343 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
18344 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
18345 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
18346 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
18347 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 2)
18348 // CHECK: ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)18349 void test_vst4q_f16(float16_t * a, float16x8x4_t b) {
18350 vst4q_f16(a, b);
18351 }
18352
18353 // CHECK-LABEL: @test_vst4q_f32(
18354 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
18355 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
18356 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
18357 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
18358 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18359 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
18360 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
18361 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18362 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
18363 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18364 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
18365 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
18366 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
18367 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18368 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
18369 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
18370 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
18371 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18372 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
18373 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
18374 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
18375 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18376 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
18377 // CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
18378 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
18379 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
18380 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
18381 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
18382 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
18383 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 4)
18384 // CHECK: ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)18385 void test_vst4q_f32(float32_t * a, float32x4x4_t b) {
18386 vst4q_f32(a, b);
18387 }
18388
18389 // CHECK-LABEL: @test_vst4q_p8(
18390 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
18391 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
18392 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
18393 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18394 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18395 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
18396 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
18397 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18398 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18399 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18400 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18401 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18402 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18403 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18404 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18405 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18406 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18407 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18408 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18409 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18410 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18411 // CHECK: ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)18412 void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) {
18413 vst4q_p8(a, b);
18414 }
18415
18416 // CHECK-LABEL: @test_vst4q_p16(
18417 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
18418 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
18419 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
18420 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18421 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18422 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
18423 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
18424 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18425 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18426 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18427 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18428 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18429 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18430 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18431 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18432 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18433 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18434 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18435 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18436 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18437 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18438 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18439 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18440 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18441 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18442 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18443 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18444 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18445 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18446 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18447 // CHECK: ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)18448 void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) {
18449 vst4q_p16(a, b);
18450 }
18451
18452 // CHECK-LABEL: @test_vst4_u8(
18453 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
18454 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
18455 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
18456 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18457 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18458 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
18459 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
18460 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18461 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18462 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18463 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18464 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18465 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18466 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18467 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18468 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18469 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18470 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18471 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18472 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18473 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18474 // CHECK: ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)18475 void test_vst4_u8(uint8_t * a, uint8x8x4_t b) {
18476 vst4_u8(a, b);
18477 }
18478
18479 // CHECK-LABEL: @test_vst4_u16(
18480 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
18481 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
18482 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
18483 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18484 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18485 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
18486 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
18487 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18488 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18489 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18490 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18491 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18492 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18493 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18494 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18495 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18496 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18497 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18498 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18499 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18500 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18501 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18502 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18503 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18504 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18505 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18506 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18507 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18508 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18509 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18510 // CHECK: ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)18511 void test_vst4_u16(uint16_t * a, uint16x4x4_t b) {
18512 vst4_u16(a, b);
18513 }
18514
18515 // CHECK-LABEL: @test_vst4_u32(
18516 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
18517 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
18518 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
18519 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18520 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18521 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
18522 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
18523 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18524 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18525 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18526 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
18527 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18528 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18529 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18530 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18531 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18532 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18533 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18534 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
18535 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
18536 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18537 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18538 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
18539 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
18540 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18541 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18542 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18543 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18544 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18545 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
18546 // CHECK: ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)18547 void test_vst4_u32(uint32_t * a, uint32x2x4_t b) {
18548 vst4_u32(a, b);
18549 }
18550
18551 // CHECK-LABEL: @test_vst4_u64(
18552 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
18553 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
18554 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
18555 // CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
18556 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18557 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
18558 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
18559 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18560 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
18561 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18562 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
18563 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18564 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18565 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18566 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18567 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18568 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18569 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18570 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
18571 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
18572 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
18573 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18574 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
18575 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
18576 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
18577 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18578 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18579 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
18580 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
18581 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
18582 // CHECK: ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)18583 void test_vst4_u64(uint64_t * a, uint64x1x4_t b) {
18584 vst4_u64(a, b);
18585 }
18586
18587 // CHECK-LABEL: @test_vst4_s8(
18588 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
18589 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
18590 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
18591 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18592 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18593 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
18594 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
18595 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18596 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18597 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18598 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18599 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18600 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18601 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18602 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18603 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18604 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18605 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18606 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18607 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18608 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18609 // CHECK: ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)18610 void test_vst4_s8(int8_t * a, int8x8x4_t b) {
18611 vst4_s8(a, b);
18612 }
18613
18614 // CHECK-LABEL: @test_vst4_s16(
18615 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
18616 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
18617 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
18618 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18619 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18620 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
18621 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
18622 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18623 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18624 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18625 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18626 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18627 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18628 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18629 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18630 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18631 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18632 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18633 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18634 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18635 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18636 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18637 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18638 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18639 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18640 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18641 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18642 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18643 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18644 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18645 // CHECK: ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)18646 void test_vst4_s16(int16_t * a, int16x4x4_t b) {
18647 vst4_s16(a, b);
18648 }
18649
18650 // CHECK-LABEL: @test_vst4_s32(
18651 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
18652 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
18653 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
18654 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18655 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18656 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
18657 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
18658 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18659 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18660 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18661 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
18662 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18663 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18664 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18665 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18666 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18667 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18668 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18669 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
18670 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
18671 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18672 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18673 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
18674 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
18675 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18676 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18677 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18678 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18679 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18680 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
18681 // CHECK: ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)18682 void test_vst4_s32(int32_t * a, int32x2x4_t b) {
18683 vst4_s32(a, b);
18684 }
18685
18686 // CHECK-LABEL: @test_vst4_s64(
18687 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
18688 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
18689 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
18690 // CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
18691 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18692 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
18693 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
18694 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18695 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
18696 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18697 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
18698 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18699 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18700 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18701 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18702 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18703 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18704 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18705 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
18706 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
18707 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
18708 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18709 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
18710 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
18711 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
18712 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18713 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18714 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
18715 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
18716 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
18717 // CHECK: ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)18718 void test_vst4_s64(int64_t * a, int64x1x4_t b) {
18719 vst4_s64(a, b);
18720 }
18721
18722 // CHECK-LABEL: @test_vst4_f16(
18723 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
18724 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
18725 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
18726 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
18727 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18728 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
18729 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
18730 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18731 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
18732 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18733 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
18734 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
18735 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
18736 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18737 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
18738 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
18739 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
18740 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18741 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
18742 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
18743 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
18744 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18745 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
18746 // CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
18747 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
18748 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
18749 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
18750 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
18751 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
18752 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 2)
18753 // CHECK: ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)18754 void test_vst4_f16(float16_t * a, float16x4x4_t b) {
18755 vst4_f16(a, b);
18756 }
18757
18758 // CHECK-LABEL: @test_vst4_f32(
18759 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
18760 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
18761 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
18762 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
18763 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18764 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
18765 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
18766 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18767 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
18768 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18769 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
18770 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
18771 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
18772 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18773 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
18774 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
18775 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18776 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18777 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
18778 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
18779 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
18780 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18781 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
18782 // CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
18783 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
18784 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18785 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18786 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
18787 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
18788 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 4)
18789 // CHECK: ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)18790 void test_vst4_f32(float32_t * a, float32x2x4_t b) {
18791 vst4_f32(a, b);
18792 }
18793
18794 // CHECK-LABEL: @test_vst4_p8(
18795 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
18796 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
18797 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
18798 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18799 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18800 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
18801 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
18802 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18803 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18804 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18805 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18806 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18807 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18808 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18809 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18810 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18811 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18812 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18813 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18814 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18815 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18816 // CHECK: ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)18817 void test_vst4_p8(poly8_t * a, poly8x8x4_t b) {
18818 vst4_p8(a, b);
18819 }
18820
18821 // CHECK-LABEL: @test_vst4_p16(
18822 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
18823 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
18824 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
18825 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18826 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18827 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
18828 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
18829 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18830 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18831 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18832 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18833 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18834 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18835 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18836 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18837 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18838 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18839 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18840 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18841 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18842 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18843 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18844 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18845 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18846 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18847 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18848 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18849 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18850 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18851 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18852 // CHECK: ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)18853 void test_vst4_p16(poly16_t * a, poly16x4x4_t b) {
18854 vst4_p16(a, b);
18855 }
18856
18857 // CHECK-LABEL: @test_vst4q_lane_u16(
18858 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
18859 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
18860 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
18861 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18862 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18863 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
18864 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
18865 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18866 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18867 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18868 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18869 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18870 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18871 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18872 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18873 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18874 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18875 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18876 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18877 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18878 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18879 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18880 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18881 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18882 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18883 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18884 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18885 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18886 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18887 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18888 // CHECK: ret void
test_vst4q_lane_u16(uint16_t * a,uint16x8x4_t b)18889 void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) {
18890 vst4q_lane_u16(a, b, 7);
18891 }
18892
18893 // CHECK-LABEL: @test_vst4q_lane_u32(
18894 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
18895 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
18896 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
18897 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18898 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18899 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
18900 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
18901 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18902 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18903 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18904 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18905 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18906 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18907 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18908 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18909 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18910 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18911 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18912 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18913 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18914 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18915 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18916 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18917 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18918 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18919 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18920 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18921 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18922 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18923 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18924 // CHECK: ret void
test_vst4q_lane_u32(uint32_t * a,uint32x4x4_t b)18925 void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) {
18926 vst4q_lane_u32(a, b, 3);
18927 }
18928
18929 // CHECK-LABEL: @test_vst4q_lane_s16(
18930 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
18931 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
18932 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
18933 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18934 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18935 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
18936 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
18937 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18938 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18939 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18940 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18941 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18942 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18943 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18944 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18945 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18946 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18947 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18948 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18949 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18950 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18951 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18952 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18953 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18954 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18955 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18956 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18957 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18958 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18959 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18960 // CHECK: ret void
test_vst4q_lane_s16(int16_t * a,int16x8x4_t b)18961 void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) {
18962 vst4q_lane_s16(a, b, 7);
18963 }
18964
18965 // CHECK-LABEL: @test_vst4q_lane_s32(
18966 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
18967 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
18968 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
18969 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18970 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18971 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
18972 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
18973 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18974 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18975 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18976 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18977 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18978 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18979 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18980 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18981 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18982 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18983 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18984 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18985 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18986 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18987 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18988 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18989 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18990 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18991 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18992 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18993 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18994 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18995 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18996 // CHECK: ret void
test_vst4q_lane_s32(int32_t * a,int32x4x4_t b)18997 void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) {
18998 vst4q_lane_s32(a, b, 3);
18999 }
19000
19001 // CHECK-LABEL: @test_vst4q_lane_f16(
19002 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
19003 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
19004 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
19005 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
19006 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
19007 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
19008 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
19009 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
19010 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
19011 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
19012 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
19013 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
19014 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
19015 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
19016 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
19017 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
19018 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
19019 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
19020 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
19021 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
19022 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
19023 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
19024 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
19025 // CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
19026 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
19027 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
19028 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
19029 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
19030 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
19031 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 7, i32 2)
19032 // CHECK: ret void
test_vst4q_lane_f16(float16_t * a,float16x8x4_t b)19033 void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) {
19034 vst4q_lane_f16(a, b, 7);
19035 }
19036
19037 // CHECK-LABEL: @test_vst4q_lane_f32(
19038 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
19039 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
19040 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
19041 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
19042 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
19043 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
19044 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
19045 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
19046 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
19047 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19048 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
19049 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
19050 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
19051 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19052 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
19053 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
19054 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
19055 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19056 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
19057 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
19058 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
19059 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19060 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
19061 // CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
19062 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
19063 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
19064 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
19065 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
19066 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
19067 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 3, i32 4)
19068 // CHECK: ret void
test_vst4q_lane_f32(float32_t * a,float32x4x4_t b)19069 void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) {
19070 vst4q_lane_f32(a, b, 3);
19071 }
19072
19073 // CHECK-LABEL: @test_vst4q_lane_p16(
19074 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
19075 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
19076 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
19077 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
19078 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
19079 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
19080 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
19081 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
19082 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19083 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19084 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
19085 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
19086 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
19087 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19088 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
19089 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
19090 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
19091 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19092 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
19093 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
19094 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
19095 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19096 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
19097 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
19098 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
19099 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19100 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19101 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
19102 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
19103 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
19104 // CHECK: ret void
test_vst4q_lane_p16(poly16_t * a,poly16x8x4_t b)19105 void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) {
19106 vst4q_lane_p16(a, b, 7);
19107 }
19108
19109 // CHECK-LABEL: @test_vst4_lane_u8(
19110 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
19111 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
19112 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
19113 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19114 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19115 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
19116 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
19117 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19118 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19119 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19120 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19121 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19122 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19123 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19124 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19125 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19126 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19127 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19128 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19129 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19130 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19131 // CHECK: ret void
test_vst4_lane_u8(uint8_t * a,uint8x8x4_t b)19132 void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) {
19133 vst4_lane_u8(a, b, 7);
19134 }
19135
19136 // CHECK-LABEL: @test_vst4_lane_u16(
19137 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
19138 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
19139 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
19140 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19141 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19142 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
19143 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
19144 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19145 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19146 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19147 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19148 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19149 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19150 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19151 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19152 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19153 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19154 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19155 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19156 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19157 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19158 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19159 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19160 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19161 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19162 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19163 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19164 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19165 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19166 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19167 // CHECK: ret void
test_vst4_lane_u16(uint16_t * a,uint16x4x4_t b)19168 void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) {
19169 vst4_lane_u16(a, b, 3);
19170 }
19171
19172 // CHECK-LABEL: @test_vst4_lane_u32(
19173 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
19174 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
19175 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
19176 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19177 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19178 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
19179 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
19180 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19181 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19182 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19183 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
19184 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19185 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19186 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19187 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19188 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19189 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19190 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19191 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19192 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19193 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19194 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19195 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
19196 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
19197 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
19198 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19199 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19200 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19201 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
19202 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
19203 // CHECK: ret void
test_vst4_lane_u32(uint32_t * a,uint32x2x4_t b)19204 void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) {
19205 vst4_lane_u32(a, b, 1);
19206 }
19207
19208 // CHECK-LABEL: @test_vst4_lane_s8(
19209 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
19210 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
19211 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
19212 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19213 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19214 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
19215 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
19216 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19217 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19218 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19219 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19220 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19221 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19222 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19223 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19224 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19225 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19226 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19227 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19228 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19229 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19230 // CHECK: ret void
test_vst4_lane_s8(int8_t * a,int8x8x4_t b)19231 void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) {
19232 vst4_lane_s8(a, b, 7);
19233 }
19234
19235 // CHECK-LABEL: @test_vst4_lane_s16(
19236 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
19237 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
19238 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
19239 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19240 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19241 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
19242 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
19243 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19244 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19245 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19246 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19247 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19248 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19249 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19250 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19251 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19252 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19253 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19254 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19255 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19256 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19257 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19258 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19259 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19260 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19261 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19262 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19263 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19264 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19265 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19266 // CHECK: ret void
test_vst4_lane_s16(int16_t * a,int16x4x4_t b)19267 void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) {
19268 vst4_lane_s16(a, b, 3);
19269 }
19270
19271 // CHECK-LABEL: @test_vst4_lane_s32(
19272 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
19273 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
19274 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
19275 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19276 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19277 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
19278 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
19279 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19280 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19281 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19282 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
19283 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19284 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19285 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19286 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19287 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19288 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19289 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19290 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19291 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19292 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19293 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19294 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
19295 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
19296 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
19297 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19298 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19299 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19300 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
19301 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
19302 // CHECK: ret void
test_vst4_lane_s32(int32_t * a,int32x2x4_t b)19303 void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) {
19304 vst4_lane_s32(a, b, 1);
19305 }
19306
19307 // CHECK-LABEL: @test_vst4_lane_f16(
19308 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
19309 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
19310 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
19311 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
19312 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19313 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
19314 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
19315 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19316 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
19317 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19318 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
19319 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
19320 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
19321 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19322 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
19323 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
19324 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
19325 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19326 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
19327 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
19328 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
19329 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19330 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
19331 // CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
19332 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
19333 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
19334 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
19335 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
19336 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
19337 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 3, i32 2)
19338 // CHECK: ret void
test_vst4_lane_f16(float16_t * a,float16x4x4_t b)19339 void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
19340 vst4_lane_f16(a, b, 3);
19341 }
19342
19343 // CHECK-LABEL: @test_vst4_lane_f32(
19344 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
19345 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
19346 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
19347 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
19348 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19349 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
19350 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
19351 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19352 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
19353 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19354 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
19355 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
19356 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
19357 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19358 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
19359 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
19360 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
19361 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19362 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
19363 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
19364 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
19365 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19366 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
19367 // CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
19368 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
19369 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
19370 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
19371 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
19372 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
19373 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 1, i32 4)
19374 // CHECK: ret void
test_vst4_lane_f32(float32_t * a,float32x2x4_t b)19375 void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) {
19376 vst4_lane_f32(a, b, 1);
19377 }
19378
19379 // CHECK-LABEL: @test_vst4_lane_p8(
19380 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
19381 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
19382 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
19383 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19384 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19385 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
19386 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
19387 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19388 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19389 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19390 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19391 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19392 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19393 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19394 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19395 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19396 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19397 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19398 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19399 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19400 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19401 // CHECK: ret void
test_vst4_lane_p8(poly8_t * a,poly8x8x4_t b)19402 void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) {
19403 vst4_lane_p8(a, b, 7);
19404 }
19405
19406 // CHECK-LABEL: @test_vst4_lane_p16(
19407 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
19408 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
19409 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
19410 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19411 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19412 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
19413 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
19414 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19415 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19416 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19417 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19418 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19419 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19420 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19421 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19422 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19423 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19424 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19425 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19426 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19427 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19428 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19429 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19430 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19431 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19432 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19433 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19434 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19435 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19436 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19437 // CHECK: ret void
test_vst4_lane_p16(poly16_t * a,poly16x4x4_t b)19438 void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) {
19439 vst4_lane_p16(a, b, 3);
19440 }
19441
19442 // CHECK-LABEL: @test_vsub_s8(
19443 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
19444 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t a,int8x8_t b)19445 int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) {
19446 return vsub_s8(a, b);
19447 }
19448
19449 // CHECK-LABEL: @test_vsub_s16(
19450 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
19451 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t a,int16x4_t b)19452 int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) {
19453 return vsub_s16(a, b);
19454 }
19455
19456 // CHECK-LABEL: @test_vsub_s32(
19457 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
19458 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t a,int32x2_t b)19459 int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) {
19460 return vsub_s32(a, b);
19461 }
19462
19463 // CHECK-LABEL: @test_vsub_s64(
19464 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
19465 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t a,int64x1_t b)19466 int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) {
19467 return vsub_s64(a, b);
19468 }
19469
19470 // CHECK-LABEL: @test_vsub_f32(
19471 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, %b
19472 // CHECK: ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t a,float32x2_t b)19473 float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) {
19474 return vsub_f32(a, b);
19475 }
19476
19477 // CHECK-LABEL: @test_vsub_u8(
19478 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
19479 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t a,uint8x8_t b)19480 uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) {
19481 return vsub_u8(a, b);
19482 }
19483
19484 // CHECK-LABEL: @test_vsub_u16(
19485 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
19486 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t a,uint16x4_t b)19487 uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) {
19488 return vsub_u16(a, b);
19489 }
19490
19491 // CHECK-LABEL: @test_vsub_u32(
19492 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
19493 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t a,uint32x2_t b)19494 uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) {
19495 return vsub_u32(a, b);
19496 }
19497
19498 // CHECK-LABEL: @test_vsub_u64(
19499 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
19500 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t a,uint64x1_t b)19501 uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) {
19502 return vsub_u64(a, b);
19503 }
19504
19505 // CHECK-LABEL: @test_vsubq_s8(
19506 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
19507 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t a,int8x16_t b)19508 int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) {
19509 return vsubq_s8(a, b);
19510 }
19511
19512 // CHECK-LABEL: @test_vsubq_s16(
19513 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
19514 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t a,int16x8_t b)19515 int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) {
19516 return vsubq_s16(a, b);
19517 }
19518
19519 // CHECK-LABEL: @test_vsubq_s32(
19520 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
19521 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t a,int32x4_t b)19522 int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) {
19523 return vsubq_s32(a, b);
19524 }
19525
19526 // CHECK-LABEL: @test_vsubq_s64(
19527 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
19528 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t a,int64x2_t b)19529 int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) {
19530 return vsubq_s64(a, b);
19531 }
19532
19533 // CHECK-LABEL: @test_vsubq_f32(
19534 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, %b
19535 // CHECK: ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t a,float32x4_t b)19536 float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) {
19537 return vsubq_f32(a, b);
19538 }
19539
19540 // CHECK-LABEL: @test_vsubq_u8(
19541 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
19542 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t a,uint8x16_t b)19543 uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) {
19544 return vsubq_u8(a, b);
19545 }
19546
19547 // CHECK-LABEL: @test_vsubq_u16(
19548 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
19549 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t a,uint16x8_t b)19550 uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) {
19551 return vsubq_u16(a, b);
19552 }
19553
19554 // CHECK-LABEL: @test_vsubq_u32(
19555 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
19556 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t a,uint32x4_t b)19557 uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) {
19558 return vsubq_u32(a, b);
19559 }
19560
19561 // CHECK-LABEL: @test_vsubq_u64(
19562 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
19563 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t a,uint64x2_t b)19564 uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) {
19565 return vsubq_u64(a, b);
19566 }
19567
19568 // CHECK-LABEL: @test_vsubhn_s16(
19569 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19571 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
19572 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
19573 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
19574 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)19575 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
19576 return vsubhn_s16(a, b);
19577 }
19578
19579 // CHECK-LABEL: @test_vsubhn_s32(
19580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19582 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
19583 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
19584 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
19585 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)19586 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
19587 return vsubhn_s32(a, b);
19588 }
19589
19590 // CHECK-LABEL: @test_vsubhn_s64(
19591 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
19592 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
19593 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
19594 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
19595 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
19596 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)19597 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
19598 return vsubhn_s64(a, b);
19599 }
19600
19601 // CHECK-LABEL: @test_vsubhn_u16(
19602 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19603 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19604 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
19605 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
19606 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
19607 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)19608 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
19609 return vsubhn_u16(a, b);
19610 }
19611
19612 // CHECK-LABEL: @test_vsubhn_u32(
19613 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19614 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19615 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
19616 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
19617 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
19618 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)19619 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
19620 return vsubhn_u32(a, b);
19621 }
19622
19623 // CHECK-LABEL: @test_vsubhn_u64(
19624 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
19625 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
19626 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
19627 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
19628 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
19629 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)19630 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
19631 return vsubhn_u64(a, b);
19632 }
19633
19634 // CHECK-LABEL: @test_vsubl_s8(
19635 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
19636 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
19637 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19638 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)19639 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
19640 return vsubl_s8(a, b);
19641 }
19642
19643 // CHECK-LABEL: @test_vsubl_s16(
19644 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19645 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
19646 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19647 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
19648 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19649 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)19650 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
19651 return vsubl_s16(a, b);
19652 }
19653
19654 // CHECK-LABEL: @test_vsubl_s32(
19655 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19656 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
19657 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19658 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
19659 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19660 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)19661 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
19662 return vsubl_s32(a, b);
19663 }
19664
19665 // CHECK-LABEL: @test_vsubl_u8(
19666 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
19667 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
19668 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19669 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)19670 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
19671 return vsubl_u8(a, b);
19672 }
19673
19674 // CHECK-LABEL: @test_vsubl_u16(
19675 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19676 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
19677 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19678 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
19679 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19680 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)19681 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
19682 return vsubl_u16(a, b);
19683 }
19684
19685 // CHECK-LABEL: @test_vsubl_u32(
19686 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19687 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
19688 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19689 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
19690 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19691 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)19692 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
19693 return vsubl_u32(a, b);
19694 }
19695
19696 // CHECK-LABEL: @test_vsubw_s8(
19697 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
19698 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
19699 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)19700 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
19701 return vsubw_s8(a, b);
19702 }
19703
19704 // CHECK-LABEL: @test_vsubw_s16(
19705 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19706 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
19707 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
19708 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)19709 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
19710 return vsubw_s16(a, b);
19711 }
19712
19713 // CHECK-LABEL: @test_vsubw_s32(
19714 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19715 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
19716 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
19717 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)19718 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
19719 return vsubw_s32(a, b);
19720 }
19721
19722 // CHECK-LABEL: @test_vsubw_u8(
19723 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
19724 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
19725 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)19726 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
19727 return vsubw_u8(a, b);
19728 }
19729
19730 // CHECK-LABEL: @test_vsubw_u16(
19731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19732 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
19733 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
19734 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)19735 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
19736 return vsubw_u16(a, b);
19737 }
19738
19739 // CHECK-LABEL: @test_vsubw_u32(
19740 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19741 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
19742 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
19743 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)19744 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
19745 return vsubw_u32(a, b);
19746 }
19747
19748 // CHECK-LABEL: @test_vtbl1_u8(
19749 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19750 // CHECK: ret <8 x i8> [[VTBL1_I]]
test_vtbl1_u8(uint8x8_t a,uint8x8_t b)19751 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
19752 return vtbl1_u8(a, b);
19753 }
19754
19755 // CHECK-LABEL: @test_vtbl1_s8(
19756 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19757 // CHECK: ret <8 x i8> [[VTBL1_I]]
test_vtbl1_s8(int8x8_t a,int8x8_t b)19758 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
19759 return vtbl1_s8(a, b);
19760 }
19761
19762 // CHECK-LABEL: @test_vtbl1_p8(
19763 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19764 // CHECK: ret <8 x i8> [[VTBL1_I]]
test_vtbl1_p8(poly8x8_t a,uint8x8_t b)19765 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
19766 return vtbl1_p8(a, b);
19767 }
19768
19769 // CHECK-LABEL: @test_vtbl2_u8(
19770 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
19771 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8
19772 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
19773 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19774 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19775 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
19776 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19777 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19778 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19779 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19780 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19781 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19782 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19783 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19784 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19785 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19786 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19787 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19788 // CHECK: ret <8 x i8> [[VTBL2_I]]
test_vtbl2_u8(uint8x8x2_t a,uint8x8_t b)19789 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
19790 return vtbl2_u8(a, b);
19791 }
19792
19793 // CHECK-LABEL: @test_vtbl2_s8(
19794 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8
19795 // CHECK: [[A:%.*]] = alloca %struct.int8x8x2_t, align 8
19796 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
19797 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19798 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19799 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
19800 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19801 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19802 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19803 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19804 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19805 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19806 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19807 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19808 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19809 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19810 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19811 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19812 // CHECK: ret <8 x i8> [[VTBL2_I]]
test_vtbl2_s8(int8x8x2_t a,int8x8_t b)19813 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
19814 return vtbl2_s8(a, b);
19815 }
19816
19817 // CHECK-LABEL: @test_vtbl2_p8(
19818 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
19819 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8
19820 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
19821 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19822 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19823 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
19824 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19825 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19826 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19827 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19828 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19829 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19830 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19831 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19832 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19833 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19834 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19835 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19836 // CHECK: ret <8 x i8> [[VTBL2_I]]
test_vtbl2_p8(poly8x8x2_t a,uint8x8_t b)19837 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
19838 return vtbl2_p8(a, b);
19839 }
19840
19841 // CHECK-LABEL: @test_vtbl3_u8(
19842 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
19843 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8
19844 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
19845 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19846 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19847 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
19848 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19849 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19850 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19851 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19852 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19853 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19854 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19855 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19856 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19857 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19858 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19859 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19860 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19861 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19862 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19863 // CHECK: ret <8 x i8> [[VTBL3_I]]
test_vtbl3_u8(uint8x8x3_t a,uint8x8_t b)19864 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
19865 return vtbl3_u8(a, b);
19866 }
19867
19868 // CHECK-LABEL: @test_vtbl3_s8(
19869 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8
19870 // CHECK: [[A:%.*]] = alloca %struct.int8x8x3_t, align 8
19871 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
19872 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19873 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19874 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
19875 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19876 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19877 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19878 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19879 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19880 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19881 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19882 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19883 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19884 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19885 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19886 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19887 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19888 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19889 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19890 // CHECK: ret <8 x i8> [[VTBL3_I]]
test_vtbl3_s8(int8x8x3_t a,int8x8_t b)19891 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
19892 return vtbl3_s8(a, b);
19893 }
19894
19895 // CHECK-LABEL: @test_vtbl3_p8(
19896 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
19897 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8
19898 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
19899 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19900 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19901 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
19902 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19903 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19904 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19905 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19906 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19907 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19908 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19909 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19910 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19911 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19912 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19913 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19914 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19915 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19916 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19917 // CHECK: ret <8 x i8> [[VTBL3_I]]
test_vtbl3_p8(poly8x8x3_t a,uint8x8_t b)19918 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
19919 return vtbl3_p8(a, b);
19920 }
19921
19922 // CHECK-LABEL: @test_vtbl4_u8(
19923 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
19924 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8
19925 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
19926 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19927 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19928 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
19929 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19930 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19931 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19932 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19933 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19934 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19935 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19936 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19937 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19938 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19939 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19940 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19941 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19942 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19943 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19944 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19945 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19946 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19947 // CHECK: ret <8 x i8> [[VTBL4_I]]
test_vtbl4_u8(uint8x8x4_t a,uint8x8_t b)19948 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
19949 return vtbl4_u8(a, b);
19950 }
19951
19952 // CHECK-LABEL: @test_vtbl4_s8(
19953 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8
19954 // CHECK: [[A:%.*]] = alloca %struct.int8x8x4_t, align 8
19955 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
19956 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19957 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19958 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
19959 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19960 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19961 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19962 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19963 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19964 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19965 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19966 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19967 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19968 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19969 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19970 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19971 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19972 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19973 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19974 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19975 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19976 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19977 // CHECK: ret <8 x i8> [[VTBL4_I]]
test_vtbl4_s8(int8x8x4_t a,int8x8_t b)19978 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
19979 return vtbl4_s8(a, b);
19980 }
19981
19982 // CHECK-LABEL: @test_vtbl4_p8(
19983 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
19984 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8
19985 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
19986 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19987 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19988 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
19989 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19990 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19991 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19992 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19993 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19994 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19995 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19996 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19997 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19998 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19999 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20000 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
20001 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20002 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20003 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
20004 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20005 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20006 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
20007 // CHECK: ret <8 x i8> [[VTBL4_I]]
test_vtbl4_p8(poly8x8x4_t a,uint8x8_t b)20008 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
20009 return vtbl4_p8(a, b);
20010 }
20011
20012 // CHECK-LABEL: @test_vtbx1_u8(
20013 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
20014 // CHECK: ret <8 x i8> [[VTBX1_I]]
test_vtbx1_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)20015 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
20016 return vtbx1_u8(a, b, c);
20017 }
20018
20019 // CHECK-LABEL: @test_vtbx1_s8(
20020 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
20021 // CHECK: ret <8 x i8> [[VTBX1_I]]
test_vtbx1_s8(int8x8_t a,int8x8_t b,int8x8_t c)20022 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
20023 return vtbx1_s8(a, b, c);
20024 }
20025
20026 // CHECK-LABEL: @test_vtbx1_p8(
20027 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
20028 // CHECK: ret <8 x i8> [[VTBX1_I]]
test_vtbx1_p8(poly8x8_t a,poly8x8_t b,uint8x8_t c)20029 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
20030 return vtbx1_p8(a, b, c);
20031 }
20032
20033 // CHECK-LABEL: @test_vtbx2_u8(
20034 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
20035 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
20036 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
20037 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20038 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20039 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
20040 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20041 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20042 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
20043 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20044 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20045 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
20046 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20047 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20048 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
20049 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20050 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20051 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20052 // CHECK: ret <8 x i8> [[VTBX2_I]]
test_vtbx2_u8(uint8x8_t a,uint8x8x2_t b,uint8x8_t c)20053 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
20054 return vtbx2_u8(a, b, c);
20055 }
20056
20057 // CHECK-LABEL: @test_vtbx2_s8(
20058 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8
20059 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
20060 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
20061 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20062 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20063 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
20064 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20065 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20066 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20067 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20068 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20069 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20070 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20071 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20072 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20073 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20074 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20075 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20076 // CHECK: ret <8 x i8> [[VTBX2_I]]
test_vtbx2_s8(int8x8_t a,int8x8x2_t b,int8x8_t c)20077 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
20078 return vtbx2_s8(a, b, c);
20079 }
20080
20081 // CHECK-LABEL: @test_vtbx2_p8(
20082 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
20083 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
20084 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
20085 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20086 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20087 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
20088 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20089 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20090 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20091 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20092 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20093 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20094 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20095 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20096 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20097 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20098 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20099 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20100 // CHECK: ret <8 x i8> [[VTBX2_I]]
test_vtbx2_p8(poly8x8_t a,poly8x8x2_t b,uint8x8_t c)20101 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
20102 return vtbx2_p8(a, b, c);
20103 }
20104
20105 // CHECK-LABEL: @test_vtbx3_u8(
20106 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
20107 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
20108 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
20109 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20110 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20111 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
20112 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20113 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20114 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20115 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20116 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20117 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20118 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20119 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20120 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20121 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20122 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20123 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20124 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20125 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20126 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20127 // CHECK: ret <8 x i8> [[VTBX3_I]]
test_vtbx3_u8(uint8x8_t a,uint8x8x3_t b,uint8x8_t c)20128 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
20129 return vtbx3_u8(a, b, c);
20130 }
20131
20132 // CHECK-LABEL: @test_vtbx3_s8(
20133 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8
20134 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
20135 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
20136 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20137 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20138 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
20139 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20140 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20141 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20142 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20143 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20144 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20145 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20146 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20147 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20148 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20149 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20150 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20151 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20152 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20153 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20154 // CHECK: ret <8 x i8> [[VTBX3_I]]
test_vtbx3_s8(int8x8_t a,int8x8x3_t b,int8x8_t c)20155 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
20156 return vtbx3_s8(a, b, c);
20157 }
20158
20159 // CHECK-LABEL: @test_vtbx3_p8(
20160 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
20161 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
20162 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
20163 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20164 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20165 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
20166 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20167 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20168 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20169 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20170 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20171 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20172 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20173 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20174 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20175 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20176 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20177 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20178 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20179 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20180 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20181 // CHECK: ret <8 x i8> [[VTBX3_I]]
test_vtbx3_p8(poly8x8_t a,poly8x8x3_t b,uint8x8_t c)20182 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
20183 return vtbx3_p8(a, b, c);
20184 }
20185
20186 // CHECK-LABEL: @test_vtbx4_u8(
20187 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
20188 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
20189 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
20190 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20191 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20192 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
20193 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20194 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20195 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20196 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20197 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20198 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20199 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20200 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20201 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20202 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20203 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20204 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20205 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20206 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20207 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20208 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20209 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20210 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20211 // CHECK: ret <8 x i8> [[VTBX4_I]]
test_vtbx4_u8(uint8x8_t a,uint8x8x4_t b,uint8x8_t c)20212 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
20213 return vtbx4_u8(a, b, c);
20214 }
20215
20216 // CHECK-LABEL: @test_vtbx4_s8(
20217 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8
20218 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
20219 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
20220 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20221 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20222 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
20223 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20224 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20225 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20226 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20227 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20228 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20229 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20230 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20231 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20232 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20233 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20234 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20235 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20236 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20237 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20238 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20239 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20240 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20241 // CHECK: ret <8 x i8> [[VTBX4_I]]
test_vtbx4_s8(int8x8_t a,int8x8x4_t b,int8x8_t c)20242 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
20243 return vtbx4_s8(a, b, c);
20244 }
20245
20246 // CHECK-LABEL: @test_vtbx4_p8(
20247 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
20248 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
20249 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
20250 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20251 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20252 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
20253 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20254 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20255 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20256 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20257 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20258 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20259 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20260 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20261 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20262 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20263 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20264 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20265 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20266 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20267 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20268 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20269 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20270 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20271 // CHECK: ret <8 x i8> [[VTBX4_I]]
test_vtbx4_p8(poly8x8_t a,poly8x8x4_t b,uint8x8_t c)20272 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
20273 return vtbx4_p8(a, b, c);
20274 }
20275
20276 // CHECK: @test_vtrn_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20277 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20278 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20279 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20280 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !3
20281 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20282 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20283 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !3
20284 // CHECK: ret void
test_vtrn_s8(int8x8_t a,int8x8_t b)20285 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
20286 return vtrn_s8(a, b);
20287 }
20288
20289 // CHECK: @test_vtrn_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20290 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20291 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20292 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20293 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20294 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20295 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !6
20296 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20297 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20298 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !6
20299 // CHECK: ret void
test_vtrn_s16(int16x4_t a,int16x4_t b)20300 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
20301 return vtrn_s16(a, b);
20302 }
20303
20304 // CHECK: @test_vtrn_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20305 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20306 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20307 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20308 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20309 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20310 // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !9
20311 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20312 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20313 // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !9
20314 // CHECK: ret void
test_vtrn_s32(int32x2_t a,int32x2_t b)20315 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
20316 return vtrn_s32(a, b);
20317 }
20318
20319 // CHECK: @test_vtrn_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20320 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20321 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20322 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20323 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !12
20324 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20325 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20326 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !12
20327 // CHECK: ret void
test_vtrn_u8(uint8x8_t a,uint8x8_t b)20328 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
20329 return vtrn_u8(a, b);
20330 }
20331
20332 // CHECK: @test_vtrn_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20333 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20334 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20335 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20336 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20337 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20338 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !15
20339 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20340 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20341 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !15
20342 // CHECK: ret void
test_vtrn_u16(uint16x4_t a,uint16x4_t b)20343 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
20344 return vtrn_u16(a, b);
20345 }
20346
20347 // CHECK: @test_vtrn_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20348 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20349 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20350 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20351 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20352 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20353 // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !18
20354 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20355 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20356 // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !18
20357 // CHECK: ret void
test_vtrn_u32(uint32x2_t a,uint32x2_t b)20358 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
20359 return vtrn_u32(a, b);
20360 }
20361
20362 // CHECK: @test_vtrn_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20363 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20364 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20365 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20366 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20367 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20368 // CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !21
20369 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
20370 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
20371 // CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !21
20372 // CHECK: ret void
test_vtrn_f32(float32x2_t a,float32x2_t b)20373 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
20374 return vtrn_f32(a, b);
20375 }
20376
20377 // CHECK: @test_vtrn_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20378 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
20379 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20380 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20381 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !24
20382 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20383 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20384 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !24
20385 // CHECK: ret void
test_vtrn_p8(poly8x8_t a,poly8x8_t b)20386 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
20387 return vtrn_p8(a, b);
20388 }
20389
20390 // CHECK: @test_vtrn_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20391 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
20392 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20393 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20394 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20395 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20396 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !27
20397 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20398 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20399 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !27
20400 // CHECK: ret void
test_vtrn_p16(poly16x4_t a,poly16x4_t b)20401 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
20402 return vtrn_p16(a, b);
20403 }
20404
20405 // CHECK: @test_vtrnq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20406 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
20407 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20408 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20409 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !30
20410 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20411 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20412 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !30
20413 // CHECK: ret void
test_vtrnq_s8(int8x16_t a,int8x16_t b)20414 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
20415 return vtrnq_s8(a, b);
20416 }
20417
20418 // CHECK: @test_vtrnq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20419 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
20420 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20421 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20422 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20423 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20424 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !33
20425 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20426 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20427 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !33
20428 // CHECK: ret void
test_vtrnq_s16(int16x8_t a,int16x8_t b)20429 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
20430 return vtrnq_s16(a, b);
20431 }
20432
20433 // CHECK: @test_vtrnq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20434 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
20435 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20436 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20437 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20438 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20439 // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !36
20440 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20441 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20442 // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !36
20443 // CHECK: ret void
test_vtrnq_s32(int32x4_t a,int32x4_t b)20444 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
20445 return vtrnq_s32(a, b);
20446 }
20447
20448 // CHECK: @test_vtrnq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20449 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
20450 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20451 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20452 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !39
20453 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20454 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20455 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !39
20456 // CHECK: ret void
test_vtrnq_u8(uint8x16_t a,uint8x16_t b)20457 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
20458 return vtrnq_u8(a, b);
20459 }
20460
20461 // CHECK: @test_vtrnq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20462 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
20463 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20464 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20465 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20466 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20467 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !42
20468 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20469 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20470 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !42
20471 // CHECK: ret void
test_vtrnq_u16(uint16x8_t a,uint16x8_t b)20472 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
20473 return vtrnq_u16(a, b);
20474 }
20475
20476 // CHECK: @test_vtrnq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20477 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
20478 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20479 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20480 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20481 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20482 // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !45
20483 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20484 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20485 // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !45
20486 // CHECK: ret void
test_vtrnq_u32(uint32x4_t a,uint32x4_t b)20487 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
20488 return vtrnq_u32(a, b);
20489 }
20490
20491 // CHECK: @test_vtrnq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20492 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
20493 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
20494 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
20495 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
20496 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20497 // CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !48
20498 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
20499 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20500 // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !48
20501 // CHECK: ret void
test_vtrnq_f32(float32x4_t a,float32x4_t b)20502 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
20503 return vtrnq_f32(a, b);
20504 }
20505
20506 // CHECK: @test_vtrnq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20507 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
20508 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20509 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20510 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !51
20511 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20512 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20513 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !51
20514 // CHECK: ret void
test_vtrnq_p8(poly8x16_t a,poly8x16_t b)20515 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
20516 return vtrnq_p8(a, b);
20517 }
20518
20519 // CHECK: @test_vtrnq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20520 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
20521 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20522 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20523 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20524 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20525 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !54
20526 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20527 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20528 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !54
20529 // CHECK: ret void
test_vtrnq_p16(poly16x8_t a,poly16x8_t b)20530 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
20531 return vtrnq_p16(a, b);
20532 }
20533
20534 // CHECK-LABEL: @test_vtst_s8(
20535 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
20536 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20537 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20538 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t a,int8x8_t b)20539 uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) {
20540 return vtst_s8(a, b);
20541 }
20542
20543 // CHECK-LABEL: @test_vtst_s16(
20544 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20545 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20546 // CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
20547 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20548 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20549 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t a,int16x4_t b)20550 uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) {
20551 return vtst_s16(a, b);
20552 }
20553
20554 // CHECK-LABEL: @test_vtst_s32(
20555 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20556 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20557 // CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b
20558 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
20559 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
20560 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t a,int32x2_t b)20561 uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) {
20562 return vtst_s32(a, b);
20563 }
20564
20565 // CHECK-LABEL: @test_vtst_u8(
20566 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
20567 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20568 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20569 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t a,uint8x8_t b)20570 uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) {
20571 return vtst_u8(a, b);
20572 }
20573
20574 // CHECK-LABEL: @test_vtst_u16(
20575 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20576 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20577 // CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
20578 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20579 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20580 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t a,uint16x4_t b)20581 uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) {
20582 return vtst_u16(a, b);
20583 }
20584
20585 // CHECK-LABEL: @test_vtst_u32(
20586 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20587 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20588 // CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b
20589 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
20590 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
20591 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t a,uint32x2_t b)20592 uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) {
20593 return vtst_u32(a, b);
20594 }
20595
20596 // CHECK-LABEL: @test_vtst_p8(
20597 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
20598 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20599 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20600 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t a,poly8x8_t b)20601 uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) {
20602 return vtst_p8(a, b);
20603 }
20604
20605 // CHECK-LABEL: @test_vtst_p16(
20606 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20607 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20608 // CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
20609 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20610 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20611 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t a,poly16x4_t b)20612 uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) {
20613 return vtst_p16(a, b);
20614 }
20615
20616 // CHECK-LABEL: @test_vtstq_s8(
20617 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
20618 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20619 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20620 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t a,int8x16_t b)20621 uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) {
20622 return vtstq_s8(a, b);
20623 }
20624
20625 // CHECK-LABEL: @test_vtstq_s16(
20626 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20627 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20628 // CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
20629 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20630 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20631 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t a,int16x8_t b)20632 uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) {
20633 return vtstq_s16(a, b);
20634 }
20635
20636 // CHECK-LABEL: @test_vtstq_s32(
20637 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20638 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20639 // CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b
20640 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
20641 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
20642 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t a,int32x4_t b)20643 uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) {
20644 return vtstq_s32(a, b);
20645 }
20646
20647 // CHECK-LABEL: @test_vtstq_u8(
20648 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
20649 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20650 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20651 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t a,uint8x16_t b)20652 uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) {
20653 return vtstq_u8(a, b);
20654 }
20655
20656 // CHECK-LABEL: @test_vtstq_u16(
20657 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20658 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20659 // CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
20660 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20661 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20662 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t a,uint16x8_t b)20663 uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) {
20664 return vtstq_u16(a, b);
20665 }
20666
20667 // CHECK-LABEL: @test_vtstq_u32(
20668 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20669 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20670 // CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b
20671 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
20672 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
20673 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t a,uint32x4_t b)20674 uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) {
20675 return vtstq_u32(a, b);
20676 }
20677
20678 // CHECK-LABEL: @test_vtstq_p8(
20679 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
20680 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20681 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20682 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t a,poly8x16_t b)20683 uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) {
20684 return vtstq_p8(a, b);
20685 }
20686
20687 // CHECK-LABEL: @test_vtstq_p16(
20688 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20689 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20690 // CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
20691 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20692 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20693 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t a,poly16x8_t b)20694 uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) {
20695 return vtstq_p16(a, b);
20696 }
20697
20698 // CHECK: @test_vuzp_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20699 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20700 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20701 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20702 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !57
20703 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20704 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20705 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !57
20706 // CHECK: ret void
test_vuzp_s8(int8x8_t a,int8x8_t b)20707 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
20708 return vuzp_s8(a, b);
20709 }
20710
20711 // CHECK: @test_vuzp_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20712 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20713 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20714 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20715 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20716 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20717 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !60
20718 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20719 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20720 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !60
20721 // CHECK: ret void
test_vuzp_s16(int16x4_t a,int16x4_t b)20722 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
20723 return vuzp_s16(a, b);
20724 }
20725
20726 // CHECK: @test_vuzp_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20727 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20728 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20729 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20730 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20731 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20732 // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !63
20733 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20734 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20735 // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !63
20736 // CHECK: ret void
test_vuzp_s32(int32x2_t a,int32x2_t b)20737 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
20738 return vuzp_s32(a, b);
20739 }
20740
20741 // CHECK: @test_vuzp_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20742 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20743 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20744 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20745 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !66
20746 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20747 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20748 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !66
20749 // CHECK: ret void
test_vuzp_u8(uint8x8_t a,uint8x8_t b)20750 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
20751 return vuzp_u8(a, b);
20752 }
20753
20754 // CHECK: @test_vuzp_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20755 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20756 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20757 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20758 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20759 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20760 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !69
20761 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20762 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20763 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !69
20764 // CHECK: ret void
test_vuzp_u16(uint16x4_t a,uint16x4_t b)20765 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
20766 return vuzp_u16(a, b);
20767 }
20768
20769 // CHECK: @test_vuzp_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20770 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20771 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20772 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20773 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20774 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20775 // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !72
20776 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20777 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20778 // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !72
20779 // CHECK: ret void
test_vuzp_u32(uint32x2_t a,uint32x2_t b)20780 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
20781 return vuzp_u32(a, b);
20782 }
20783
20784 // CHECK: @test_vuzp_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20785 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20786 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20787 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20788 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20789 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20790 // CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !75
20791 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
20792 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
20793 // CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !75
20794 // CHECK: ret void
test_vuzp_f32(float32x2_t a,float32x2_t b)20795 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
20796 return vuzp_f32(a, b);
20797 }
20798
20799 // CHECK: @test_vuzp_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20800 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
20801 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20802 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20803 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !78
20804 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20805 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20806 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !78
20807 // CHECK: ret void
test_vuzp_p8(poly8x8_t a,poly8x8_t b)20808 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
20809 return vuzp_p8(a, b);
20810 }
20811
20812 // CHECK: @test_vuzp_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20813 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
20814 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20815 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20816 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20817 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20818 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !81
20819 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20820 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20821 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !81
20822 // CHECK: ret void
test_vuzp_p16(poly16x4_t a,poly16x4_t b)20823 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
20824 return vuzp_p16(a, b);
20825 }
20826
20827 // CHECK: @test_vuzpq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20828 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
20829 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20830 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20831 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !84
20832 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20833 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20834 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !84
20835 // CHECK: ret void
test_vuzpq_s8(int8x16_t a,int8x16_t b)20836 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
20837 return vuzpq_s8(a, b);
20838 }
20839
20840 // CHECK: @test_vuzpq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20841 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
20842 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20843 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20844 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20845 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20846 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !87
20847 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20848 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20849 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !87
20850 // CHECK: ret void
test_vuzpq_s16(int16x8_t a,int16x8_t b)20851 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
20852 return vuzpq_s16(a, b);
20853 }
20854
20855 // CHECK: @test_vuzpq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20856 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
20857 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20858 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20859 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20860 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20861 // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !90
20862 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20863 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20864 // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !90
20865 // CHECK: ret void
test_vuzpq_s32(int32x4_t a,int32x4_t b)20866 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
20867 return vuzpq_s32(a, b);
20868 }
20869
20870 // CHECK: @test_vuzpq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20871 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
20872 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20873 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20874 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !93
20875 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20876 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20877 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !93
20878 // CHECK: ret void
test_vuzpq_u8(uint8x16_t a,uint8x16_t b)20879 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
20880 return vuzpq_u8(a, b);
20881 }
20882
20883 // CHECK: @test_vuzpq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20884 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
20885 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20886 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20887 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20888 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20889 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !96
20890 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20891 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20892 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !96
20893 // CHECK: ret void
test_vuzpq_u16(uint16x8_t a,uint16x8_t b)20894 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
20895 return vuzpq_u16(a, b);
20896 }
20897
20898 // CHECK: @test_vuzpq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20899 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
20900 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20901 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20902 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20903 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20904 // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !99
20905 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20906 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20907 // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !99
20908 // CHECK: ret void
test_vuzpq_u32(uint32x4_t a,uint32x4_t b)20909 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
20910 return vuzpq_u32(a, b);
20911 }
20912
20913 // CHECK: @test_vuzpq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20914 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
20915 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
20916 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
20917 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
20918 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20919 // CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !102
20920 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
20921 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20922 // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !102
20923 // CHECK: ret void
test_vuzpq_f32(float32x4_t a,float32x4_t b)20924 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
20925 return vuzpq_f32(a, b);
20926 }
20927
20928 // CHECK: @test_vuzpq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20929 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
20930 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20931 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20932 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !105
20933 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20934 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20935 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !105
20936 // CHECK: ret void
test_vuzpq_p8(poly8x16_t a,poly8x16_t b)20937 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
20938 return vuzpq_p8(a, b);
20939 }
20940
20941 // CHECK: @test_vuzpq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20942 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
20943 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20944 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20945 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20946 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20947 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !108
20948 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20949 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20950 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !108
20951 // CHECK: ret void
test_vuzpq_p16(poly16x8_t a,poly16x8_t b)20952 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
20953 return vuzpq_p16(a, b);
20954 }
20955
20956 // CHECK: @test_vzip_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20957 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20958 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20959 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
20960 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !111
20961 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20962 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
20963 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !111
20964 // CHECK: ret void
test_vzip_s8(int8x8_t a,int8x8_t b)20965 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
20966 return vzip_s8(a, b);
20967 }
20968
20969 // CHECK: @test_vzip_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20970 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20971 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20972 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20973 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20974 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
20975 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !114
20976 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20977 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
20978 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !114
20979 // CHECK: ret void
test_vzip_s16(int16x4_t a,int16x4_t b)20980 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
20981 return vzip_s16(a, b);
20982 }
20983
20984 // CHECK: @test_vzip_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20985 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20986 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20987 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20988 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20989 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20990 // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !117
20991 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20992 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20993 // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !117
20994 // CHECK: ret void
test_vzip_s32(int32x2_t a,int32x2_t b)20995 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
20996 return vzip_s32(a, b);
20997 }
20998
20999 // CHECK: @test_vzip_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21000 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
21001 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
21002 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21003 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !120
21004 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
21005 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21006 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !120
21007 // CHECK: ret void
test_vzip_u8(uint8x8_t a,uint8x8_t b)21008 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
21009 return vzip_u8(a, b);
21010 }
21011
21012 // CHECK: @test_vzip_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21013 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
21014 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
21015 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
21016 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
21017 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21018 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !123
21019 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
21020 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21021 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !123
21022 // CHECK: ret void
test_vzip_u16(uint16x4_t a,uint16x4_t b)21023 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
21024 return vzip_u16(a, b);
21025 }
21026
21027 // CHECK: @test_vzip_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21028 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
21029 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
21030 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
21031 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
21032 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
21033 // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !126
21034 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
21035 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
21036 // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !126
21037 // CHECK: ret void
test_vzip_u32(uint32x2_t a,uint32x2_t b)21038 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
21039 return vzip_u32(a, b);
21040 }
21041
21042 // CHECK: @test_vzip_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21043 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
21044 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
21045 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
21046 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
21047 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
21048 // CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !129
21049 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
21050 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
21051 // CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !129
21052 // CHECK: ret void
test_vzip_f32(float32x2_t a,float32x2_t b)21053 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
21054 return vzip_f32(a, b);
21055 }
21056
21057 // CHECK: @test_vzip_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21058 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
21059 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
21060 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21061 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !132
21062 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
21063 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21064 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !132
21065 // CHECK: ret void
test_vzip_p8(poly8x8_t a,poly8x8_t b)21066 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
21067 return vzip_p8(a, b);
21068 }
21069
21070 // CHECK: @test_vzip_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21071 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
21072 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
21073 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
21074 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
21075 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21076 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !135
21077 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
21078 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21079 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !135
21080 // CHECK: ret void
test_vzip_p16(poly16x4_t a,poly16x4_t b)21081 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
21082 return vzip_p16(a, b);
21083 }
21084
21085 // CHECK: @test_vzipq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21086 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
21087 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21088 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21089 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !138
21090 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21091 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21092 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !138
21093 // CHECK: ret void
test_vzipq_s8(int8x16_t a,int8x16_t b)21094 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
21095 return vzipq_s8(a, b);
21096 }
21097
21098 // CHECK: @test_vzipq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21099 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
21100 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21101 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21102 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21103 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21104 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !141
21105 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21106 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21107 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !141
21108 // CHECK: ret void
test_vzipq_s16(int16x8_t a,int16x8_t b)21109 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
21110 return vzipq_s16(a, b);
21111 }
21112
21113 // CHECK: @test_vzipq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21114 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
21115 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21116 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21117 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
21118 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21119 // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !144
21120 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
21121 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21122 // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !144
21123 // CHECK: ret void
test_vzipq_s32(int32x4_t a,int32x4_t b)21124 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
21125 return vzipq_s32(a, b);
21126 }
21127
21128 // CHECK: @test_vzipq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21129 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
21130 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21131 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21132 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !147
21133 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21134 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21135 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !147
21136 // CHECK: ret void
test_vzipq_u8(uint8x16_t a,uint8x16_t b)21137 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
21138 return vzipq_u8(a, b);
21139 }
21140
21141 // CHECK: @test_vzipq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21142 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
21143 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21144 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21145 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21146 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21147 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !150
21148 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21149 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21150 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !150
21151 // CHECK: ret void
test_vzipq_u16(uint16x8_t a,uint16x8_t b)21152 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
21153 return vzipq_u16(a, b);
21154 }
21155
21156 // CHECK: @test_vzipq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21157 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
21158 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21159 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21160 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
21161 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21162 // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !153
21163 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
21164 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21165 // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !153
21166 // CHECK: ret void
test_vzipq_u32(uint32x4_t a,uint32x4_t b)21167 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
21168 return vzipq_u32(a, b);
21169 }
21170
21171 // CHECK: @test_vzipq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21172 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
21173 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
21174 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
21175 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
21176 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21177 // CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !156
21178 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
21179 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21180 // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !156
21181 // CHECK: ret void
test_vzipq_f32(float32x4_t a,float32x4_t b)21182 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
21183 return vzipq_f32(a, b);
21184 }
21185
21186 // CHECK: @test_vzipq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21187 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
21188 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21189 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21190 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !159
21191 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21192 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21193 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !159
21194 // CHECK: ret void
test_vzipq_p8(poly8x16_t a,poly8x16_t b)21195 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
21196 return vzipq_p8(a, b);
21197 }
21198
21199 // CHECK: @test_vzipq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21200 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
21201 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21202 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21203 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21204 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21205 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !162
21206 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21207 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21208 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !162
21209 // CHECK: ret void
test_vzipq_p16(poly16x8_t a,poly16x8_t b)21210 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
21211 return vzipq_p16(a, b);
21212 }
21213