1 // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
2 // RUN: -target-cpu swift -fallow-half-arguments-and-returns \
3 // RUN: -target-feature +fullfp16 -ffreestanding \
4 // RUN: -flax-vector-conversions=none \
5 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
6 // RUN: | opt -S -mem2reg | FileCheck %s
7
8 #include <arm_neon.h>
9
10 // CHECK-LABEL: @test_vaba_s8(
11 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
12 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
13 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t a,int8x8_t b,int8x8_t c)14 int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
15 return vaba_s8(a, b, c);
16 }
17
18 // CHECK-LABEL: @test_vaba_s16(
19 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
21 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
22 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
23 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
24 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t a,int16x4_t b,int16x4_t c)25 int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
26 return vaba_s16(a, b, c);
27 }
28
29 // CHECK-LABEL: @test_vaba_s32(
30 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
31 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
32 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
33 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
34 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
35 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t a,int32x2_t b,int32x2_t c)36 int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
37 return vaba_s32(a, b, c);
38 }
39
40 // CHECK-LABEL: @test_vaba_u8(
41 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
42 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
43 // CHECK: ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)44 uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
45 return vaba_u8(a, b, c);
46 }
47
48 // CHECK-LABEL: @test_vaba_u16(
49 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
50 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
51 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
52 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
53 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
54 // CHECK: ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)55 uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
56 return vaba_u16(a, b, c);
57 }
58
59 // CHECK-LABEL: @test_vaba_u32(
60 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
61 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
62 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
63 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
64 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
65 // CHECK: ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)66 uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
67 return vaba_u32(a, b, c);
68 }
69
70 // CHECK-LABEL: @test_vabaq_s8(
71 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c)
72 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
73 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)74 int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
75 return vabaq_s8(a, b, c);
76 }
77
78 // CHECK-LABEL: @test_vabaq_s16(
79 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
80 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
81 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c)
82 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
83 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
84 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)85 int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
86 return vabaq_s16(a, b, c);
87 }
88
89 // CHECK-LABEL: @test_vabaq_s32(
90 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
91 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
92 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c)
93 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
94 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
95 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)96 int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
97 return vabaq_s32(a, b, c);
98 }
99
100 // CHECK-LABEL: @test_vabaq_u8(
101 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c)
102 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
103 // CHECK: ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)104 uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
105 return vabaq_u8(a, b, c);
106 }
107
108 // CHECK-LABEL: @test_vabaq_u16(
109 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
110 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
111 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c)
112 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
113 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
114 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)115 uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
116 return vabaq_u16(a, b, c);
117 }
118
119 // CHECK-LABEL: @test_vabaq_u32(
120 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
121 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
122 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c)
123 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
124 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
125 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)126 uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
127 return vabaq_u32(a, b, c);
128 }
129
130 // CHECK-LABEL: @test_vabal_s8(
131 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
132 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
133 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
134 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)135 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
136 return vabal_s8(a, b, c);
137 }
138
139 // CHECK-LABEL: @test_vabal_s16(
140 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
141 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
142 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
143 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
144 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
145 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
146 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
147 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)148 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
149 return vabal_s16(a, b, c);
150 }
151
152 // CHECK-LABEL: @test_vabal_s32(
153 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
154 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
155 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
156 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
157 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
158 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
159 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
160 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)161 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
162 return vabal_s32(a, b, c);
163 }
164
165 // CHECK-LABEL: @test_vabal_u8(
166 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
167 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
168 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
169 // CHECK: ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)170 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
171 return vabal_u8(a, b, c);
172 }
173
174 // CHECK-LABEL: @test_vabal_u16(
175 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
176 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
177 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
178 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
179 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
180 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
181 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
182 // CHECK: ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)183 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
184 return vabal_u16(a, b, c);
185 }
186
187 // CHECK-LABEL: @test_vabal_u32(
188 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
189 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
190 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
191 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
192 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
193 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
194 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
195 // CHECK: ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)196 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
197 return vabal_u32(a, b, c);
198 }
199
200 // CHECK-LABEL: @test_vabd_s8(
201 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
202 // CHECK: ret <8 x i8> [[VABD_V_I]]
test_vabd_s8(int8x8_t a,int8x8_t b)203 int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) {
204 return vabd_s8(a, b);
205 }
206
207 // CHECK-LABEL: @test_vabd_s16(
208 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
209 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
210 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
211 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
212 // CHECK: ret <4 x i16> [[VABD_V2_I]]
test_vabd_s16(int16x4_t a,int16x4_t b)213 int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) {
214 return vabd_s16(a, b);
215 }
216
217 // CHECK-LABEL: @test_vabd_s32(
218 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
219 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
220 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
221 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
222 // CHECK: ret <2 x i32> [[VABD_V2_I]]
test_vabd_s32(int32x2_t a,int32x2_t b)223 int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) {
224 return vabd_s32(a, b);
225 }
226
227 // CHECK-LABEL: @test_vabd_u8(
228 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
229 // CHECK: ret <8 x i8> [[VABD_V_I]]
test_vabd_u8(uint8x8_t a,uint8x8_t b)230 uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) {
231 return vabd_u8(a, b);
232 }
233
234 // CHECK-LABEL: @test_vabd_u16(
235 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
236 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
237 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
238 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
239 // CHECK: ret <4 x i16> [[VABD_V2_I]]
test_vabd_u16(uint16x4_t a,uint16x4_t b)240 uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) {
241 return vabd_u16(a, b);
242 }
243
244 // CHECK-LABEL: @test_vabd_u32(
245 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
246 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
247 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
248 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
249 // CHECK: ret <2 x i32> [[VABD_V2_I]]
test_vabd_u32(uint32x2_t a,uint32x2_t b)250 uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) {
251 return vabd_u32(a, b);
252 }
253
254 // CHECK-LABEL: @test_vabd_f32(
255 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
256 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
257 // CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b)
258 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8>
259 // CHECK: ret <2 x float> [[VABD_V2_I]]
test_vabd_f32(float32x2_t a,float32x2_t b)260 float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) {
261 return vabd_f32(a, b);
262 }
263
264 // CHECK-LABEL: @test_vabdq_s8(
265 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b)
266 // CHECK: ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_s8(int8x16_t a,int8x16_t b)267 int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) {
268 return vabdq_s8(a, b);
269 }
270
271 // CHECK-LABEL: @test_vabdq_s16(
272 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
273 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
274 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b)
275 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
276 // CHECK: ret <8 x i16> [[VABDQ_V2_I]]
test_vabdq_s16(int16x8_t a,int16x8_t b)277 int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) {
278 return vabdq_s16(a, b);
279 }
280
281 // CHECK-LABEL: @test_vabdq_s32(
282 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
283 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
284 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b)
285 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
286 // CHECK: ret <4 x i32> [[VABDQ_V2_I]]
test_vabdq_s32(int32x4_t a,int32x4_t b)287 int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) {
288 return vabdq_s32(a, b);
289 }
290
291 // CHECK-LABEL: @test_vabdq_u8(
292 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b)
293 // CHECK: ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_u8(uint8x16_t a,uint8x16_t b)294 uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) {
295 return vabdq_u8(a, b);
296 }
297
298 // CHECK-LABEL: @test_vabdq_u16(
299 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
300 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
301 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b)
302 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
303 // CHECK: ret <8 x i16> [[VABDQ_V2_I]]
test_vabdq_u16(uint16x8_t a,uint16x8_t b)304 uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) {
305 return vabdq_u16(a, b);
306 }
307
308 // CHECK-LABEL: @test_vabdq_u32(
309 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
310 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
311 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b)
312 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
313 // CHECK: ret <4 x i32> [[VABDQ_V2_I]]
test_vabdq_u32(uint32x4_t a,uint32x4_t b)314 uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) {
315 return vabdq_u32(a, b);
316 }
317
318 // CHECK-LABEL: @test_vabdq_f32(
319 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
320 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
321 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b)
322 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8>
323 // CHECK: ret <4 x float> [[VABDQ_V2_I]]
test_vabdq_f32(float32x4_t a,float32x4_t b)324 float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) {
325 return vabdq_f32(a, b);
326 }
327
328 // CHECK-LABEL: @test_vabdl_s8(
329 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
330 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
331 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)332 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
333 return vabdl_s8(a, b);
334 }
335
336 // CHECK-LABEL: @test_vabdl_s16(
337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
339 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
340 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
341 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
342 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
343 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)344 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
345 return vabdl_s16(a, b);
346 }
347
348 // CHECK-LABEL: @test_vabdl_s32(
349 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
350 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
351 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
352 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
353 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
354 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
355 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)356 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
357 return vabdl_s32(a, b);
358 }
359
360 // CHECK-LABEL: @test_vabdl_u8(
361 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
362 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
363 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)364 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
365 return vabdl_u8(a, b);
366 }
367
368 // CHECK-LABEL: @test_vabdl_u16(
369 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
370 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
371 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
372 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
373 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
374 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
375 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)376 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
377 return vabdl_u16(a, b);
378 }
379
380 // CHECK-LABEL: @test_vabdl_u32(
381 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
382 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
383 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
384 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
385 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
386 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
387 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)388 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
389 return vabdl_u32(a, b);
390 }
391
392 // CHECK-LABEL: @test_vabs_s8(
393 // CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a)
394 // CHECK: ret <8 x i8> [[VABS_I]]
test_vabs_s8(int8x8_t a)395 int8x8_t test_vabs_s8(int8x8_t a) {
396 return vabs_s8(a);
397 }
398
399 // CHECK-LABEL: @test_vabs_s16(
400 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
401 // CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a)
402 // CHECK: ret <4 x i16> [[VABS1_I]]
test_vabs_s16(int16x4_t a)403 int16x4_t test_vabs_s16(int16x4_t a) {
404 return vabs_s16(a);
405 }
406
407 // CHECK-LABEL: @test_vabs_s32(
408 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
409 // CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a)
410 // CHECK: ret <2 x i32> [[VABS1_I]]
test_vabs_s32(int32x2_t a)411 int32x2_t test_vabs_s32(int32x2_t a) {
412 return vabs_s32(a);
413 }
414
415 // CHECK-LABEL: @test_vabs_f32(
416 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
417 // CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
418 // CHECK: ret <2 x float> [[VABS1_I]]
test_vabs_f32(float32x2_t a)419 float32x2_t test_vabs_f32(float32x2_t a) {
420 return vabs_f32(a);
421 }
422
423 // CHECK-LABEL: @test_vabsq_s8(
424 // CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a)
425 // CHECK: ret <16 x i8> [[VABS_I]]
test_vabsq_s8(int8x16_t a)426 int8x16_t test_vabsq_s8(int8x16_t a) {
427 return vabsq_s8(a);
428 }
429
430 // CHECK-LABEL: @test_vabsq_s16(
431 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
432 // CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a)
433 // CHECK: ret <8 x i16> [[VABS1_I]]
test_vabsq_s16(int16x8_t a)434 int16x8_t test_vabsq_s16(int16x8_t a) {
435 return vabsq_s16(a);
436 }
437
438 // CHECK-LABEL: @test_vabsq_s32(
439 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
440 // CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a)
441 // CHECK: ret <4 x i32> [[VABS1_I]]
test_vabsq_s32(int32x4_t a)442 int32x4_t test_vabsq_s32(int32x4_t a) {
443 return vabsq_s32(a);
444 }
445
446 // CHECK-LABEL: @test_vabsq_f32(
447 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
448 // CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
449 // CHECK: ret <4 x float> [[VABS1_I]]
test_vabsq_f32(float32x4_t a)450 float32x4_t test_vabsq_f32(float32x4_t a) {
451 return vabsq_f32(a);
452 }
453
454 // CHECK-LABEL: @test_vadd_s8(
455 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
456 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t a,int8x8_t b)457 int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) {
458 return vadd_s8(a, b);
459 }
460
461 // CHECK-LABEL: @test_vadd_s16(
462 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
463 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t a,int16x4_t b)464 int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) {
465 return vadd_s16(a, b);
466 }
467
468 // CHECK-LABEL: @test_vadd_s32(
469 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
470 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t a,int32x2_t b)471 int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) {
472 return vadd_s32(a, b);
473 }
474
475 // CHECK-LABEL: @test_vadd_s64(
476 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
477 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t a,int64x1_t b)478 int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) {
479 return vadd_s64(a, b);
480 }
481
482 // CHECK-LABEL: @test_vadd_f32(
483 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, %b
484 // CHECK: ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t a,float32x2_t b)485 float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) {
486 return vadd_f32(a, b);
487 }
488
489 // CHECK-LABEL: @test_vadd_u8(
490 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
491 // CHECK: ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t a,uint8x8_t b)492 uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) {
493 return vadd_u8(a, b);
494 }
495
496 // CHECK-LABEL: @test_vadd_u16(
497 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
498 // CHECK: ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t a,uint16x4_t b)499 uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) {
500 return vadd_u16(a, b);
501 }
502
503 // CHECK-LABEL: @test_vadd_u32(
504 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
505 // CHECK: ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t a,uint32x2_t b)506 uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) {
507 return vadd_u32(a, b);
508 }
509
510 // CHECK-LABEL: @test_vadd_u64(
511 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
512 // CHECK: ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t a,uint64x1_t b)513 uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) {
514 return vadd_u64(a, b);
515 }
516
517 // CHECK-LABEL: @test_vaddq_s8(
518 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
519 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t a,int8x16_t b)520 int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) {
521 return vaddq_s8(a, b);
522 }
523
524 // CHECK-LABEL: @test_vaddq_s16(
525 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
526 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t a,int16x8_t b)527 int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) {
528 return vaddq_s16(a, b);
529 }
530
531 // CHECK-LABEL: @test_vaddq_s32(
532 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
533 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t a,int32x4_t b)534 int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) {
535 return vaddq_s32(a, b);
536 }
537
538 // CHECK-LABEL: @test_vaddq_s64(
539 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
540 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t a,int64x2_t b)541 int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) {
542 return vaddq_s64(a, b);
543 }
544
545 // CHECK-LABEL: @test_vaddq_f32(
546 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, %b
547 // CHECK: ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t a,float32x4_t b)548 float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) {
549 return vaddq_f32(a, b);
550 }
551
552 // CHECK-LABEL: @test_vaddq_u8(
553 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
554 // CHECK: ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t a,uint8x16_t b)555 uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) {
556 return vaddq_u8(a, b);
557 }
558
559 // CHECK-LABEL: @test_vaddq_u16(
560 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
561 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t a,uint16x8_t b)562 uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) {
563 return vaddq_u16(a, b);
564 }
565
566 // CHECK-LABEL: @test_vaddq_u32(
567 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
568 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t a,uint32x4_t b)569 uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) {
570 return vaddq_u32(a, b);
571 }
572
573 // CHECK-LABEL: @test_vaddq_u64(
574 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
575 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t a,uint64x2_t b)576 uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) {
577 return vaddq_u64(a, b);
578 }
579
580 // CHECK-LABEL: @test_vaddhn_s16(
581 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
582 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
583 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
584 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
585 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
586 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)587 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
588 return vaddhn_s16(a, b);
589 }
590
591 // CHECK-LABEL: @test_vaddhn_s32(
592 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
593 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
594 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
595 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
596 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
597 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)598 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
599 return vaddhn_s32(a, b);
600 }
601
602 // CHECK-LABEL: @test_vaddhn_s64(
603 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
604 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
605 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
606 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
607 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
608 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)609 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
610 return vaddhn_s64(a, b);
611 }
612
613 // CHECK-LABEL: @test_vaddhn_u16(
614 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
615 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
616 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
617 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
618 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
619 // CHECK: ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)620 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
621 return vaddhn_u16(a, b);
622 }
623
624 // CHECK-LABEL: @test_vaddhn_u32(
625 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
626 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
627 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
628 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
629 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
630 // CHECK: ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)631 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
632 return vaddhn_u32(a, b);
633 }
634
635 // CHECK-LABEL: @test_vaddhn_u64(
636 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
637 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
638 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
639 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
640 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
641 // CHECK: ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)642 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
643 return vaddhn_u64(a, b);
644 }
645
646 // CHECK-LABEL: @test_vaddl_s8(
647 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
648 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
649 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
650 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)651 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
652 return vaddl_s8(a, b);
653 }
654
655 // CHECK-LABEL: @test_vaddl_s16(
656 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
657 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
658 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
659 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
660 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
661 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)662 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
663 return vaddl_s16(a, b);
664 }
665
666 // CHECK-LABEL: @test_vaddl_s32(
667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
668 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
669 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
670 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
671 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
672 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)673 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
674 return vaddl_s32(a, b);
675 }
676
677 // CHECK-LABEL: @test_vaddl_u8(
678 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
679 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
680 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
681 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)682 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
683 return vaddl_u8(a, b);
684 }
685
686 // CHECK-LABEL: @test_vaddl_u16(
687 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
688 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
689 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
690 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
691 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
692 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)693 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
694 return vaddl_u16(a, b);
695 }
696
697 // CHECK-LABEL: @test_vaddl_u32(
698 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
699 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
700 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
701 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
702 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
703 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)704 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
705 return vaddl_u32(a, b);
706 }
707
708 // CHECK-LABEL: @test_vaddw_s8(
709 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
710 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
711 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)712 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
713 return vaddw_s8(a, b);
714 }
715
716 // CHECK-LABEL: @test_vaddw_s16(
717 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
718 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
719 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
720 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)721 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
722 return vaddw_s16(a, b);
723 }
724
725 // CHECK-LABEL: @test_vaddw_s32(
726 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
727 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
728 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
729 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)730 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
731 return vaddw_s32(a, b);
732 }
733
734 // CHECK-LABEL: @test_vaddw_u8(
735 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
736 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
737 // CHECK: ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)738 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
739 return vaddw_u8(a, b);
740 }
741
742 // CHECK-LABEL: @test_vaddw_u16(
743 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
744 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
745 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
746 // CHECK: ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)747 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
748 return vaddw_u16(a, b);
749 }
750
751 // CHECK-LABEL: @test_vaddw_u32(
752 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
753 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
754 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
755 // CHECK: ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)756 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
757 return vaddw_u32(a, b);
758 }
759
760 // CHECK-LABEL: @test_vand_s8(
761 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
762 // CHECK: ret <8 x i8> [[AND_I]]
test_vand_s8(int8x8_t a,int8x8_t b)763 int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
764 return vand_s8(a, b);
765 }
766
767 // CHECK-LABEL: @test_vand_s16(
768 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
769 // CHECK: ret <4 x i16> [[AND_I]]
test_vand_s16(int16x4_t a,int16x4_t b)770 int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
771 return vand_s16(a, b);
772 }
773
774 // CHECK-LABEL: @test_vand_s32(
775 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
776 // CHECK: ret <2 x i32> [[AND_I]]
test_vand_s32(int32x2_t a,int32x2_t b)777 int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
778 return vand_s32(a, b);
779 }
780
781 // CHECK-LABEL: @test_vand_s64(
782 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
783 // CHECK: ret <1 x i64> [[AND_I]]
test_vand_s64(int64x1_t a,int64x1_t b)784 int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
785 return vand_s64(a, b);
786 }
787
788 // CHECK-LABEL: @test_vand_u8(
789 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
790 // CHECK: ret <8 x i8> [[AND_I]]
test_vand_u8(uint8x8_t a,uint8x8_t b)791 uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
792 return vand_u8(a, b);
793 }
794
795 // CHECK-LABEL: @test_vand_u16(
796 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
797 // CHECK: ret <4 x i16> [[AND_I]]
test_vand_u16(uint16x4_t a,uint16x4_t b)798 uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
799 return vand_u16(a, b);
800 }
801
802 // CHECK-LABEL: @test_vand_u32(
803 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
804 // CHECK: ret <2 x i32> [[AND_I]]
test_vand_u32(uint32x2_t a,uint32x2_t b)805 uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
806 return vand_u32(a, b);
807 }
808
809 // CHECK-LABEL: @test_vand_u64(
810 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
811 // CHECK: ret <1 x i64> [[AND_I]]
test_vand_u64(uint64x1_t a,uint64x1_t b)812 uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
813 return vand_u64(a, b);
814 }
815
816 // CHECK-LABEL: @test_vandq_s8(
817 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
818 // CHECK: ret <16 x i8> [[AND_I]]
test_vandq_s8(int8x16_t a,int8x16_t b)819 int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
820 return vandq_s8(a, b);
821 }
822
823 // CHECK-LABEL: @test_vandq_s16(
824 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
825 // CHECK: ret <8 x i16> [[AND_I]]
test_vandq_s16(int16x8_t a,int16x8_t b)826 int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
827 return vandq_s16(a, b);
828 }
829
830 // CHECK-LABEL: @test_vandq_s32(
831 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
832 // CHECK: ret <4 x i32> [[AND_I]]
test_vandq_s32(int32x4_t a,int32x4_t b)833 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
834 return vandq_s32(a, b);
835 }
836
837 // CHECK-LABEL: @test_vandq_s64(
838 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
839 // CHECK: ret <2 x i64> [[AND_I]]
test_vandq_s64(int64x2_t a,int64x2_t b)840 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
841 return vandq_s64(a, b);
842 }
843
844 // CHECK-LABEL: @test_vandq_u8(
845 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
846 // CHECK: ret <16 x i8> [[AND_I]]
test_vandq_u8(uint8x16_t a,uint8x16_t b)847 uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
848 return vandq_u8(a, b);
849 }
850
851 // CHECK-LABEL: @test_vandq_u16(
852 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
853 // CHECK: ret <8 x i16> [[AND_I]]
test_vandq_u16(uint16x8_t a,uint16x8_t b)854 uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
855 return vandq_u16(a, b);
856 }
857
858 // CHECK-LABEL: @test_vandq_u32(
859 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
860 // CHECK: ret <4 x i32> [[AND_I]]
test_vandq_u32(uint32x4_t a,uint32x4_t b)861 uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
862 return vandq_u32(a, b);
863 }
864
865 // CHECK-LABEL: @test_vandq_u64(
866 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
867 // CHECK: ret <2 x i64> [[AND_I]]
test_vandq_u64(uint64x2_t a,uint64x2_t b)868 uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
869 return vandq_u64(a, b);
870 }
871
872 // CHECK-LABEL: @test_vbic_s8(
873 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
874 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
875 // CHECK: ret <8 x i8> [[AND_I]]
test_vbic_s8(int8x8_t a,int8x8_t b)876 int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
877 return vbic_s8(a, b);
878 }
879
880 // CHECK-LABEL: @test_vbic_s16(
881 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
882 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
883 // CHECK: ret <4 x i16> [[AND_I]]
test_vbic_s16(int16x4_t a,int16x4_t b)884 int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
885 return vbic_s16(a, b);
886 }
887
888 // CHECK-LABEL: @test_vbic_s32(
889 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
890 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
891 // CHECK: ret <2 x i32> [[AND_I]]
test_vbic_s32(int32x2_t a,int32x2_t b)892 int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
893 return vbic_s32(a, b);
894 }
895
896 // CHECK-LABEL: @test_vbic_s64(
897 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
898 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
899 // CHECK: ret <1 x i64> [[AND_I]]
test_vbic_s64(int64x1_t a,int64x1_t b)900 int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
901 return vbic_s64(a, b);
902 }
903
904 // CHECK-LABEL: @test_vbic_u8(
905 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
906 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
907 // CHECK: ret <8 x i8> [[AND_I]]
test_vbic_u8(uint8x8_t a,uint8x8_t b)908 uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
909 return vbic_u8(a, b);
910 }
911
912 // CHECK-LABEL: @test_vbic_u16(
913 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
914 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
915 // CHECK: ret <4 x i16> [[AND_I]]
test_vbic_u16(uint16x4_t a,uint16x4_t b)916 uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
917 return vbic_u16(a, b);
918 }
919
920 // CHECK-LABEL: @test_vbic_u32(
921 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
922 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
923 // CHECK: ret <2 x i32> [[AND_I]]
test_vbic_u32(uint32x2_t a,uint32x2_t b)924 uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
925 return vbic_u32(a, b);
926 }
927
928 // CHECK-LABEL: @test_vbic_u64(
929 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
930 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
931 // CHECK: ret <1 x i64> [[AND_I]]
test_vbic_u64(uint64x1_t a,uint64x1_t b)932 uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
933 return vbic_u64(a, b);
934 }
935
936 // CHECK-LABEL: @test_vbicq_s8(
937 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
938 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
939 // CHECK: ret <16 x i8> [[AND_I]]
test_vbicq_s8(int8x16_t a,int8x16_t b)940 int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
941 return vbicq_s8(a, b);
942 }
943
944 // CHECK-LABEL: @test_vbicq_s16(
945 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
946 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
947 // CHECK: ret <8 x i16> [[AND_I]]
test_vbicq_s16(int16x8_t a,int16x8_t b)948 int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
949 return vbicq_s16(a, b);
950 }
951
952 // CHECK-LABEL: @test_vbicq_s32(
953 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
954 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
955 // CHECK: ret <4 x i32> [[AND_I]]
test_vbicq_s32(int32x4_t a,int32x4_t b)956 int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
957 return vbicq_s32(a, b);
958 }
959
960 // CHECK-LABEL: @test_vbicq_s64(
961 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
962 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
963 // CHECK: ret <2 x i64> [[AND_I]]
test_vbicq_s64(int64x2_t a,int64x2_t b)964 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
965 return vbicq_s64(a, b);
966 }
967
968 // CHECK-LABEL: @test_vbicq_u8(
969 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
970 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
971 // CHECK: ret <16 x i8> [[AND_I]]
test_vbicq_u8(uint8x16_t a,uint8x16_t b)972 uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
973 return vbicq_u8(a, b);
974 }
975
976 // CHECK-LABEL: @test_vbicq_u16(
977 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
978 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
979 // CHECK: ret <8 x i16> [[AND_I]]
test_vbicq_u16(uint16x8_t a,uint16x8_t b)980 uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
981 return vbicq_u16(a, b);
982 }
983
984 // CHECK-LABEL: @test_vbicq_u32(
985 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
986 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
987 // CHECK: ret <4 x i32> [[AND_I]]
test_vbicq_u32(uint32x4_t a,uint32x4_t b)988 uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
989 return vbicq_u32(a, b);
990 }
991
992 // CHECK-LABEL: @test_vbicq_u64(
993 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
994 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
995 // CHECK: ret <2 x i64> [[AND_I]]
test_vbicq_u64(uint64x2_t a,uint64x2_t b)996 uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
997 return vbicq_u64(a, b);
998 }
999
1000 // CHECK-LABEL: @test_vbsl_s8(
1001 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1002 // CHECK: ret <8 x i8> [[VBSL_V_I]]
test_vbsl_s8(uint8x8_t a,int8x8_t b,int8x8_t c)1003 int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) {
1004 return vbsl_s8(a, b, c);
1005 }
1006
1007 // CHECK-LABEL: @test_vbsl_s16(
1008 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1009 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1010 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1011 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1012 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1013 // CHECK: ret <4 x i16> [[TMP3]]
test_vbsl_s16(uint16x4_t a,int16x4_t b,int16x4_t c)1014 int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) {
1015 return vbsl_s16(a, b, c);
1016 }
1017
1018 // CHECK-LABEL: @test_vbsl_s32(
1019 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1020 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1021 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1022 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1023 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1024 // CHECK: ret <2 x i32> [[TMP3]]
test_vbsl_s32(uint32x2_t a,int32x2_t b,int32x2_t c)1025 int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) {
1026 return vbsl_s32(a, b, c);
1027 }
1028
1029 // CHECK-LABEL: @test_vbsl_s64(
1030 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1031 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1032 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1033 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1034 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1035 // CHECK: ret <1 x i64> [[TMP3]]
test_vbsl_s64(uint64x1_t a,int64x1_t b,int64x1_t c)1036 int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) {
1037 return vbsl_s64(a, b, c);
1038 }
1039
1040 // CHECK-LABEL: @test_vbsl_u8(
1041 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1042 // CHECK: ret <8 x i8> [[VBSL_V_I]]
test_vbsl_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)1043 uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
1044 return vbsl_u8(a, b, c);
1045 }
1046
1047 // CHECK-LABEL: @test_vbsl_u16(
1048 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1049 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1050 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1051 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1052 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1053 // CHECK: ret <4 x i16> [[TMP3]]
test_vbsl_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)1054 uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
1055 return vbsl_u16(a, b, c);
1056 }
1057
1058 // CHECK-LABEL: @test_vbsl_u32(
1059 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1060 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1061 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1062 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1063 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1064 // CHECK: ret <2 x i32> [[TMP3]]
test_vbsl_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)1065 uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
1066 return vbsl_u32(a, b, c);
1067 }
1068
1069 // CHECK-LABEL: @test_vbsl_u64(
1070 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1071 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1072 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1073 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1074 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1075 // CHECK: ret <1 x i64> [[TMP3]]
test_vbsl_u64(uint64x1_t a,uint64x1_t b,uint64x1_t c)1076 uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
1077 return vbsl_u64(a, b, c);
1078 }
1079
1080 // CHECK-LABEL: @test_vbsl_f32(
1081 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1082 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1083 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
1084 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1085 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float>
1086 // CHECK: ret <2 x float> [[TMP3]]
test_vbsl_f32(uint32x2_t a,float32x2_t b,float32x2_t c)1087 float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) {
1088 return vbsl_f32(a, b, c);
1089 }
1090
1091 // CHECK-LABEL: @test_vbsl_p8(
1092 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1093 // CHECK: ret <8 x i8> [[VBSL_V_I]]
test_vbsl_p8(uint8x8_t a,poly8x8_t b,poly8x8_t c)1094 poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
1095 return vbsl_p8(a, b, c);
1096 }
1097
1098 // CHECK-LABEL: @test_vbsl_p16(
1099 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1100 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1101 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1102 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1103 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1104 // CHECK: ret <4 x i16> [[TMP3]]
test_vbsl_p16(uint16x4_t a,poly16x4_t b,poly16x4_t c)1105 poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
1106 return vbsl_p16(a, b, c);
1107 }
1108
1109 // CHECK-LABEL: @test_vbslq_s8(
1110 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1111 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_s8(uint8x16_t a,int8x16_t b,int8x16_t c)1112 int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) {
1113 return vbslq_s8(a, b, c);
1114 }
1115
1116 // CHECK-LABEL: @test_vbslq_s16(
1117 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1118 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1119 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1120 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1121 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1122 // CHECK: ret <8 x i16> [[TMP3]]
test_vbslq_s16(uint16x8_t a,int16x8_t b,int16x8_t c)1123 int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) {
1124 return vbslq_s16(a, b, c);
1125 }
1126
1127 // CHECK-LABEL: @test_vbslq_s32(
1128 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1129 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1130 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1131 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1132 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1133 // CHECK: ret <4 x i32> [[TMP3]]
test_vbslq_s32(uint32x4_t a,int32x4_t b,int32x4_t c)1134 int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) {
1135 return vbslq_s32(a, b, c);
1136 }
1137
1138 // CHECK-LABEL: @test_vbslq_s64(
1139 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1140 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1141 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1142 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1143 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1144 // CHECK: ret <2 x i64> [[TMP3]]
test_vbslq_s64(uint64x2_t a,int64x2_t b,int64x2_t c)1145 int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) {
1146 return vbslq_s64(a, b, c);
1147 }
1148
1149 // CHECK-LABEL: @test_vbslq_u8(
1150 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1151 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)1152 uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
1153 return vbslq_u8(a, b, c);
1154 }
1155
1156 // CHECK-LABEL: @test_vbslq_u16(
1157 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1158 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1159 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1160 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1161 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1162 // CHECK: ret <8 x i16> [[TMP3]]
test_vbslq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)1163 uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
1164 return vbslq_u16(a, b, c);
1165 }
1166
1167 // CHECK-LABEL: @test_vbslq_u32(
1168 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1169 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1170 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1171 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1172 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1173 // CHECK: ret <4 x i32> [[TMP3]]
test_vbslq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)1174 uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
1175 return vbslq_u32(a, b, c);
1176 }
1177
1178 // CHECK-LABEL: @test_vbslq_u64(
1179 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1180 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1181 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1182 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1183 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1184 // CHECK: ret <2 x i64> [[TMP3]]
test_vbslq_u64(uint64x2_t a,uint64x2_t b,uint64x2_t c)1185 uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
1186 return vbslq_u64(a, b, c);
1187 }
1188
1189 // CHECK-LABEL: @test_vbslq_f32(
1190 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1191 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1192 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
1193 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1194 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float>
1195 // CHECK: ret <4 x float> [[TMP3]]
test_vbslq_f32(uint32x4_t a,float32x4_t b,float32x4_t c)1196 float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) {
1197 return vbslq_f32(a, b, c);
1198 }
1199
1200 // CHECK-LABEL: @test_vbslq_p8(
1201 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1202 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_p8(uint8x16_t a,poly8x16_t b,poly8x16_t c)1203 poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
1204 return vbslq_p8(a, b, c);
1205 }
1206
1207 // CHECK-LABEL: @test_vbslq_p16(
1208 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1209 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1210 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1211 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1212 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1213 // CHECK: ret <8 x i16> [[TMP3]]
test_vbslq_p16(uint16x8_t a,poly16x8_t b,poly16x8_t c)1214 poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
1215 return vbslq_p16(a, b, c);
1216 }
1217
1218 // CHECK-LABEL: @test_vcage_f32(
1219 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1220 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1221 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1222 // CHECK: ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t a,float32x2_t b)1223 uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) {
1224 return vcage_f32(a, b);
1225 }
1226
1227 // CHECK-LABEL: @test_vcageq_f32(
1228 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1229 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1230 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1231 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t a,float32x4_t b)1232 uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) {
1233 return vcageq_f32(a, b);
1234 }
1235
1236 // CHECK-LABEL: @test_vcagt_f32(
1237 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1238 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1239 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1240 // CHECK: ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t a,float32x2_t b)1241 uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) {
1242 return vcagt_f32(a, b);
1243 }
1244
1245 // CHECK-LABEL: @test_vcagtq_f32(
1246 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1247 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1248 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1249 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t a,float32x4_t b)1250 uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) {
1251 return vcagtq_f32(a, b);
1252 }
1253
1254 // CHECK-LABEL: @test_vcale_f32(
1255 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1256 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1257 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1258 // CHECK: ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t a,float32x2_t b)1259 uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) {
1260 return vcale_f32(a, b);
1261 }
1262
1263 // CHECK-LABEL: @test_vcaleq_f32(
1264 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1265 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1266 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1267 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t a,float32x4_t b)1268 uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) {
1269 return vcaleq_f32(a, b);
1270 }
1271
1272 // CHECK-LABEL: @test_vcalt_f32(
1273 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1274 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1275 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1276 // CHECK: ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t a,float32x2_t b)1277 uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) {
1278 return vcalt_f32(a, b);
1279 }
1280
1281 // CHECK-LABEL: @test_vcaltq_f32(
1282 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1283 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1284 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1285 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t a,float32x4_t b)1286 uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) {
1287 return vcaltq_f32(a, b);
1288 }
1289
1290 // CHECK-LABEL: @test_vceq_s8(
1291 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1292 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1293 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t a,int8x8_t b)1294 uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) {
1295 return vceq_s8(a, b);
1296 }
1297
1298 // CHECK-LABEL: @test_vceq_s16(
1299 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1300 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1301 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t a,int16x4_t b)1302 uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) {
1303 return vceq_s16(a, b);
1304 }
1305
1306 // CHECK-LABEL: @test_vceq_s32(
1307 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1308 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1309 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t a,int32x2_t b)1310 uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) {
1311 return vceq_s32(a, b);
1312 }
1313
1314 // CHECK-LABEL: @test_vceq_f32(
1315 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b
1316 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1317 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t a,float32x2_t b)1318 uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) {
1319 return vceq_f32(a, b);
1320 }
1321
1322 // CHECK-LABEL: @test_vceq_u8(
1323 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1324 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1325 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t a,uint8x8_t b)1326 uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) {
1327 return vceq_u8(a, b);
1328 }
1329
1330 // CHECK-LABEL: @test_vceq_u16(
1331 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1332 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1333 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t a,uint16x4_t b)1334 uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) {
1335 return vceq_u16(a, b);
1336 }
1337
1338 // CHECK-LABEL: @test_vceq_u32(
1339 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1340 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1341 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t a,uint32x2_t b)1342 uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) {
1343 return vceq_u32(a, b);
1344 }
1345
1346 // CHECK-LABEL: @test_vceq_p8(
1347 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1348 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1349 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t a,poly8x8_t b)1350 uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) {
1351 return vceq_p8(a, b);
1352 }
1353
1354 // CHECK-LABEL: @test_vceqq_s8(
1355 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1356 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1357 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t a,int8x16_t b)1358 uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) {
1359 return vceqq_s8(a, b);
1360 }
1361
1362 // CHECK-LABEL: @test_vceqq_s16(
1363 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1364 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1365 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t a,int16x8_t b)1366 uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) {
1367 return vceqq_s16(a, b);
1368 }
1369
1370 // CHECK-LABEL: @test_vceqq_s32(
1371 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1372 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1373 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t a,int32x4_t b)1374 uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) {
1375 return vceqq_s32(a, b);
1376 }
1377
1378 // CHECK-LABEL: @test_vceqq_f32(
1379 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b
1380 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1381 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t a,float32x4_t b)1382 uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) {
1383 return vceqq_f32(a, b);
1384 }
1385
1386 // CHECK-LABEL: @test_vceqq_u8(
1387 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1388 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1389 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t a,uint8x16_t b)1390 uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) {
1391 return vceqq_u8(a, b);
1392 }
1393
1394 // CHECK-LABEL: @test_vceqq_u16(
1395 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1396 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1397 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t a,uint16x8_t b)1398 uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) {
1399 return vceqq_u16(a, b);
1400 }
1401
1402 // CHECK-LABEL: @test_vceqq_u32(
1403 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1404 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1405 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t a,uint32x4_t b)1406 uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) {
1407 return vceqq_u32(a, b);
1408 }
1409
1410 // CHECK-LABEL: @test_vceqq_p8(
1411 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1412 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1413 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t a,poly8x16_t b)1414 uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) {
1415 return vceqq_p8(a, b);
1416 }
1417
1418 // CHECK-LABEL: @test_vcge_s8(
1419 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b
1420 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1421 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t a,int8x8_t b)1422 uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) {
1423 return vcge_s8(a, b);
1424 }
1425
1426 // CHECK-LABEL: @test_vcge_s16(
1427 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b
1428 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1429 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t a,int16x4_t b)1430 uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) {
1431 return vcge_s16(a, b);
1432 }
1433
1434 // CHECK-LABEL: @test_vcge_s32(
1435 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b
1436 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1437 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t a,int32x2_t b)1438 uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) {
1439 return vcge_s32(a, b);
1440 }
1441
1442 // CHECK-LABEL: @test_vcge_f32(
1443 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b
1444 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1445 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t a,float32x2_t b)1446 uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) {
1447 return vcge_f32(a, b);
1448 }
1449
1450 // CHECK-LABEL: @test_vcge_u8(
1451 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b
1452 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1453 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t a,uint8x8_t b)1454 uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) {
1455 return vcge_u8(a, b);
1456 }
1457
1458 // CHECK-LABEL: @test_vcge_u16(
1459 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b
1460 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1461 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t a,uint16x4_t b)1462 uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) {
1463 return vcge_u16(a, b);
1464 }
1465
1466 // CHECK-LABEL: @test_vcge_u32(
1467 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b
1468 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1469 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t a,uint32x2_t b)1470 uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) {
1471 return vcge_u32(a, b);
1472 }
1473
1474 // CHECK-LABEL: @test_vcgeq_s8(
1475 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b
1476 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1477 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t a,int8x16_t b)1478 uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) {
1479 return vcgeq_s8(a, b);
1480 }
1481
1482 // CHECK-LABEL: @test_vcgeq_s16(
1483 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b
1484 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1485 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t a,int16x8_t b)1486 uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) {
1487 return vcgeq_s16(a, b);
1488 }
1489
1490 // CHECK-LABEL: @test_vcgeq_s32(
1491 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b
1492 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1493 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t a,int32x4_t b)1494 uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) {
1495 return vcgeq_s32(a, b);
1496 }
1497
1498 // CHECK-LABEL: @test_vcgeq_f32(
1499 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b
1500 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1501 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t a,float32x4_t b)1502 uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) {
1503 return vcgeq_f32(a, b);
1504 }
1505
1506 // CHECK-LABEL: @test_vcgeq_u8(
1507 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b
1508 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1509 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t a,uint8x16_t b)1510 uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) {
1511 return vcgeq_u8(a, b);
1512 }
1513
1514 // CHECK-LABEL: @test_vcgeq_u16(
1515 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b
1516 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1517 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t a,uint16x8_t b)1518 uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) {
1519 return vcgeq_u16(a, b);
1520 }
1521
1522 // CHECK-LABEL: @test_vcgeq_u32(
1523 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b
1524 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1525 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t a,uint32x4_t b)1526 uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) {
1527 return vcgeq_u32(a, b);
1528 }
1529
1530 // CHECK-LABEL: @test_vcgt_s8(
1531 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b
1532 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1533 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t a,int8x8_t b)1534 uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) {
1535 return vcgt_s8(a, b);
1536 }
1537
1538 // CHECK-LABEL: @test_vcgt_s16(
1539 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b
1540 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1541 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t a,int16x4_t b)1542 uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) {
1543 return vcgt_s16(a, b);
1544 }
1545
1546 // CHECK-LABEL: @test_vcgt_s32(
1547 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b
1548 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1549 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t a,int32x2_t b)1550 uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) {
1551 return vcgt_s32(a, b);
1552 }
1553
1554 // CHECK-LABEL: @test_vcgt_f32(
1555 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b
1556 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1557 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t a,float32x2_t b)1558 uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) {
1559 return vcgt_f32(a, b);
1560 }
1561
1562 // CHECK-LABEL: @test_vcgt_u8(
1563 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b
1564 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1565 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t a,uint8x8_t b)1566 uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) {
1567 return vcgt_u8(a, b);
1568 }
1569
1570 // CHECK-LABEL: @test_vcgt_u16(
1571 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b
1572 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1573 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t a,uint16x4_t b)1574 uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) {
1575 return vcgt_u16(a, b);
1576 }
1577
1578 // CHECK-LABEL: @test_vcgt_u32(
1579 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b
1580 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1581 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t a,uint32x2_t b)1582 uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) {
1583 return vcgt_u32(a, b);
1584 }
1585
1586 // CHECK-LABEL: @test_vcgtq_s8(
1587 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b
1588 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1589 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t a,int8x16_t b)1590 uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) {
1591 return vcgtq_s8(a, b);
1592 }
1593
1594 // CHECK-LABEL: @test_vcgtq_s16(
1595 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b
1596 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1597 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t a,int16x8_t b)1598 uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) {
1599 return vcgtq_s16(a, b);
1600 }
1601
1602 // CHECK-LABEL: @test_vcgtq_s32(
1603 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b
1604 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1605 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t a,int32x4_t b)1606 uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) {
1607 return vcgtq_s32(a, b);
1608 }
1609
1610 // CHECK-LABEL: @test_vcgtq_f32(
1611 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b
1612 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1613 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t a,float32x4_t b)1614 uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) {
1615 return vcgtq_f32(a, b);
1616 }
1617
1618 // CHECK-LABEL: @test_vcgtq_u8(
1619 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b
1620 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1621 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t a,uint8x16_t b)1622 uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) {
1623 return vcgtq_u8(a, b);
1624 }
1625
1626 // CHECK-LABEL: @test_vcgtq_u16(
1627 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b
1628 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1629 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t a,uint16x8_t b)1630 uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) {
1631 return vcgtq_u16(a, b);
1632 }
1633
1634 // CHECK-LABEL: @test_vcgtq_u32(
1635 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b
1636 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1637 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t a,uint32x4_t b)1638 uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) {
1639 return vcgtq_u32(a, b);
1640 }
1641
1642 // CHECK-LABEL: @test_vcle_s8(
1643 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b
1644 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1645 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_s8(int8x8_t a,int8x8_t b)1646 uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) {
1647 return vcle_s8(a, b);
1648 }
1649
1650 // CHECK-LABEL: @test_vcle_s16(
1651 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b
1652 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1653 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t a,int16x4_t b)1654 uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) {
1655 return vcle_s16(a, b);
1656 }
1657
1658 // CHECK-LABEL: @test_vcle_s32(
1659 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b
1660 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1661 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t a,int32x2_t b)1662 uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) {
1663 return vcle_s32(a, b);
1664 }
1665
1666 // CHECK-LABEL: @test_vcle_f32(
1667 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b
1668 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1669 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t a,float32x2_t b)1670 uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) {
1671 return vcle_f32(a, b);
1672 }
1673
1674 // CHECK-LABEL: @test_vcle_u8(
1675 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b
1676 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1677 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t a,uint8x8_t b)1678 uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) {
1679 return vcle_u8(a, b);
1680 }
1681
1682 // CHECK-LABEL: @test_vcle_u16(
1683 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b
1684 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1685 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t a,uint16x4_t b)1686 uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) {
1687 return vcle_u16(a, b);
1688 }
1689
1690 // CHECK-LABEL: @test_vcle_u32(
1691 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b
1692 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1693 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t a,uint32x2_t b)1694 uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) {
1695 return vcle_u32(a, b);
1696 }
1697
1698 // CHECK-LABEL: @test_vcleq_s8(
1699 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b
1700 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1701 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t a,int8x16_t b)1702 uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) {
1703 return vcleq_s8(a, b);
1704 }
1705
1706 // CHECK-LABEL: @test_vcleq_s16(
1707 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b
1708 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1709 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t a,int16x8_t b)1710 uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) {
1711 return vcleq_s16(a, b);
1712 }
1713
1714 // CHECK-LABEL: @test_vcleq_s32(
1715 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b
1716 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1717 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t a,int32x4_t b)1718 uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) {
1719 return vcleq_s32(a, b);
1720 }
1721
1722 // CHECK-LABEL: @test_vcleq_f32(
1723 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b
1724 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1725 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t a,float32x4_t b)1726 uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) {
1727 return vcleq_f32(a, b);
1728 }
1729
1730 // CHECK-LABEL: @test_vcleq_u8(
1731 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b
1732 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1733 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t a,uint8x16_t b)1734 uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) {
1735 return vcleq_u8(a, b);
1736 }
1737
1738 // CHECK-LABEL: @test_vcleq_u16(
1739 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b
1740 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1741 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t a,uint16x8_t b)1742 uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) {
1743 return vcleq_u16(a, b);
1744 }
1745
1746 // CHECK-LABEL: @test_vcleq_u32(
1747 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b
1748 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1749 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t a,uint32x4_t b)1750 uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) {
1751 return vcleq_u32(a, b);
1752 }
1753
1754 // CHECK-LABEL: @test_vcls_s8(
1755 // CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
1756 // CHECK: ret <8 x i8> [[VCLS_V_I]]
test_vcls_s8(int8x8_t a)1757 int8x8_t test_vcls_s8(int8x8_t a) {
1758 return vcls_s8(a);
1759 }
1760
1761 // CHECK-LABEL: @test_vcls_s16(
1762 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1763 // CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
1764 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
1765 // CHECK: ret <4 x i16> [[VCLS_V1_I]]
test_vcls_s16(int16x4_t a)1766 int16x4_t test_vcls_s16(int16x4_t a) {
1767 return vcls_s16(a);
1768 }
1769
1770 // CHECK-LABEL: @test_vcls_s32(
1771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1772 // CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
1773 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
1774 // CHECK: ret <2 x i32> [[VCLS_V1_I]]
test_vcls_s32(int32x2_t a)1775 int32x2_t test_vcls_s32(int32x2_t a) {
1776 return vcls_s32(a);
1777 }
1778
1779 // CHECK-LABEL: @test_vclsq_s8(
1780 // CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
1781 // CHECK: ret <16 x i8> [[VCLSQ_V_I]]
test_vclsq_s8(int8x16_t a)1782 int8x16_t test_vclsq_s8(int8x16_t a) {
1783 return vclsq_s8(a);
1784 }
1785
1786 // CHECK-LABEL: @test_vclsq_s16(
1787 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1788 // CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
1789 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
1790 // CHECK: ret <8 x i16> [[VCLSQ_V1_I]]
test_vclsq_s16(int16x8_t a)1791 int16x8_t test_vclsq_s16(int16x8_t a) {
1792 return vclsq_s16(a);
1793 }
1794
1795 // CHECK-LABEL: @test_vclsq_s32(
1796 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1797 // CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
1798 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
1799 // CHECK: ret <4 x i32> [[VCLSQ_V1_I]]
test_vclsq_s32(int32x4_t a)1800 int32x4_t test_vclsq_s32(int32x4_t a) {
1801 return vclsq_s32(a);
1802 }
1803
1804 // CHECK-LABEL: @test_vclt_s8(
1805 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b
1806 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1807 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_s8(int8x8_t a,int8x8_t b)1808 uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) {
1809 return vclt_s8(a, b);
1810 }
1811
1812 // CHECK-LABEL: @test_vclt_s16(
1813 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b
1814 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1815 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t a,int16x4_t b)1816 uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) {
1817 return vclt_s16(a, b);
1818 }
1819
1820 // CHECK-LABEL: @test_vclt_s32(
1821 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b
1822 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1823 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t a,int32x2_t b)1824 uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) {
1825 return vclt_s32(a, b);
1826 }
1827
1828 // CHECK-LABEL: @test_vclt_f32(
1829 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b
1830 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1831 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t a,float32x2_t b)1832 uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) {
1833 return vclt_f32(a, b);
1834 }
1835
1836 // CHECK-LABEL: @test_vclt_u8(
1837 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b
1838 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1839 // CHECK: ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t a,uint8x8_t b)1840 uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) {
1841 return vclt_u8(a, b);
1842 }
1843
1844 // CHECK-LABEL: @test_vclt_u16(
1845 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b
1846 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1847 // CHECK: ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t a,uint16x4_t b)1848 uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) {
1849 return vclt_u16(a, b);
1850 }
1851
1852 // CHECK-LABEL: @test_vclt_u32(
1853 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b
1854 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1855 // CHECK: ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t a,uint32x2_t b)1856 uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) {
1857 return vclt_u32(a, b);
1858 }
1859
1860 // CHECK-LABEL: @test_vcltq_s8(
1861 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b
1862 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1863 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t a,int8x16_t b)1864 uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) {
1865 return vcltq_s8(a, b);
1866 }
1867
1868 // CHECK-LABEL: @test_vcltq_s16(
1869 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b
1870 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1871 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t a,int16x8_t b)1872 uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) {
1873 return vcltq_s16(a, b);
1874 }
1875
1876 // CHECK-LABEL: @test_vcltq_s32(
1877 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b
1878 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1879 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t a,int32x4_t b)1880 uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) {
1881 return vcltq_s32(a, b);
1882 }
1883
1884 // CHECK-LABEL: @test_vcltq_f32(
1885 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b
1886 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1887 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t a,float32x4_t b)1888 uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) {
1889 return vcltq_f32(a, b);
1890 }
1891
1892 // CHECK-LABEL: @test_vcltq_u8(
1893 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b
1894 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1895 // CHECK: ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t a,uint8x16_t b)1896 uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) {
1897 return vcltq_u8(a, b);
1898 }
1899
1900 // CHECK-LABEL: @test_vcltq_u16(
1901 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b
1902 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1903 // CHECK: ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t a,uint16x8_t b)1904 uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) {
1905 return vcltq_u16(a, b);
1906 }
1907
1908 // CHECK-LABEL: @test_vcltq_u32(
1909 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b
1910 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1911 // CHECK: ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t a,uint32x4_t b)1912 uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) {
1913 return vcltq_u32(a, b);
1914 }
1915
1916 // CHECK-LABEL: @test_vclz_s8(
1917 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1918 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vclz_s8(int8x8_t a)1919 int8x8_t test_vclz_s8(int8x8_t a) {
1920 return vclz_s8(a);
1921 }
1922
1923 // CHECK-LABEL: @test_vclz_s16(
1924 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1925 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
1926 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
1927 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
test_vclz_s16(int16x4_t a)1928 int16x4_t test_vclz_s16(int16x4_t a) {
1929 return vclz_s16(a);
1930 }
1931
1932 // CHECK-LABEL: @test_vclz_s32(
1933 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1934 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
1935 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
1936 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
test_vclz_s32(int32x2_t a)1937 int32x2_t test_vclz_s32(int32x2_t a) {
1938 return vclz_s32(a);
1939 }
1940
1941 // CHECK-LABEL: @test_vclz_u8(
1942 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1943 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vclz_u8(uint8x8_t a)1944 uint8x8_t test_vclz_u8(uint8x8_t a) {
1945 return vclz_u8(a);
1946 }
1947
1948 // CHECK-LABEL: @test_vclz_u16(
1949 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1950 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
1951 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
1952 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
test_vclz_u16(uint16x4_t a)1953 uint16x4_t test_vclz_u16(uint16x4_t a) {
1954 return vclz_u16(a);
1955 }
1956
1957 // CHECK-LABEL: @test_vclz_u32(
1958 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1959 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
1960 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
1961 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
test_vclz_u32(uint32x2_t a)1962 uint32x2_t test_vclz_u32(uint32x2_t a) {
1963 return vclz_u32(a);
1964 }
1965
1966 // CHECK-LABEL: @test_vclzq_s8(
1967 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
1968 // CHECK: ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_s8(int8x16_t a)1969 int8x16_t test_vclzq_s8(int8x16_t a) {
1970 return vclzq_s8(a);
1971 }
1972
1973 // CHECK-LABEL: @test_vclzq_s16(
1974 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1975 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
1976 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
1977 // CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
test_vclzq_s16(int16x8_t a)1978 int16x8_t test_vclzq_s16(int16x8_t a) {
1979 return vclzq_s16(a);
1980 }
1981
1982 // CHECK-LABEL: @test_vclzq_s32(
1983 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1984 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
1985 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
1986 // CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
test_vclzq_s32(int32x4_t a)1987 int32x4_t test_vclzq_s32(int32x4_t a) {
1988 return vclzq_s32(a);
1989 }
1990
1991 // CHECK-LABEL: @test_vclzq_u8(
1992 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
1993 // CHECK: ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_u8(uint8x16_t a)1994 uint8x16_t test_vclzq_u8(uint8x16_t a) {
1995 return vclzq_u8(a);
1996 }
1997
1998 // CHECK-LABEL: @test_vclzq_u16(
1999 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2000 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
2001 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2002 // CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
test_vclzq_u16(uint16x8_t a)2003 uint16x8_t test_vclzq_u16(uint16x8_t a) {
2004 return vclzq_u16(a);
2005 }
2006
2007 // CHECK-LABEL: @test_vclzq_u32(
2008 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2009 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
2010 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2011 // CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
test_vclzq_u32(uint32x4_t a)2012 uint32x4_t test_vclzq_u32(uint32x4_t a) {
2013 return vclzq_u32(a);
2014 }
2015
2016 // CHECK-LABEL: @test_vcnt_u8(
2017 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2018 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcnt_u8(uint8x8_t a)2019 uint8x8_t test_vcnt_u8(uint8x8_t a) {
2020 return vcnt_u8(a);
2021 }
2022
2023 // CHECK-LABEL: @test_vcnt_s8(
2024 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2025 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcnt_s8(int8x8_t a)2026 int8x8_t test_vcnt_s8(int8x8_t a) {
2027 return vcnt_s8(a);
2028 }
2029
2030 // CHECK-LABEL: @test_vcnt_p8(
2031 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2032 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcnt_p8(poly8x8_t a)2033 poly8x8_t test_vcnt_p8(poly8x8_t a) {
2034 return vcnt_p8(a);
2035 }
2036
2037 // CHECK-LABEL: @test_vcntq_u8(
2038 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2039 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_u8(uint8x16_t a)2040 uint8x16_t test_vcntq_u8(uint8x16_t a) {
2041 return vcntq_u8(a);
2042 }
2043
2044 // CHECK-LABEL: @test_vcntq_s8(
2045 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2046 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_s8(int8x16_t a)2047 int8x16_t test_vcntq_s8(int8x16_t a) {
2048 return vcntq_s8(a);
2049 }
2050
2051 // CHECK-LABEL: @test_vcntq_p8(
2052 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2053 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_p8(poly8x16_t a)2054 poly8x16_t test_vcntq_p8(poly8x16_t a) {
2055 return vcntq_p8(a);
2056 }
2057
2058 // CHECK-LABEL: @test_vcombine_s8(
2059 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2060 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_s8(int8x8_t a,int8x8_t b)2061 int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) {
2062 return vcombine_s8(a, b);
2063 }
2064
2065 // CHECK-LABEL: @test_vcombine_s16(
2066 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2067 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_s16(int16x4_t a,int16x4_t b)2068 int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) {
2069 return vcombine_s16(a, b);
2070 }
2071
2072 // CHECK-LABEL: @test_vcombine_s32(
2073 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2074 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_s32(int32x2_t a,int32x2_t b)2075 int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) {
2076 return vcombine_s32(a, b);
2077 }
2078
2079 // CHECK-LABEL: @test_vcombine_s64(
2080 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2081 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_s64(int64x1_t a,int64x1_t b)2082 int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) {
2083 return vcombine_s64(a, b);
2084 }
2085
2086 // CHECK-LABEL: @test_vcombine_f16(
2087 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2088 // CHECK: ret <8 x half> [[SHUFFLE_I]]
test_vcombine_f16(float16x4_t a,float16x4_t b)2089 float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) {
2090 return vcombine_f16(a, b);
2091 }
2092
2093 // CHECK-LABEL: @test_vcombine_f32(
2094 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2095 // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vcombine_f32(float32x2_t a,float32x2_t b)2096 float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) {
2097 return vcombine_f32(a, b);
2098 }
2099
2100 // CHECK-LABEL: @test_vcombine_u8(
2101 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2102 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_u8(uint8x8_t a,uint8x8_t b)2103 uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) {
2104 return vcombine_u8(a, b);
2105 }
2106
2107 // CHECK-LABEL: @test_vcombine_u16(
2108 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2109 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_u16(uint16x4_t a,uint16x4_t b)2110 uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) {
2111 return vcombine_u16(a, b);
2112 }
2113
2114 // CHECK-LABEL: @test_vcombine_u32(
2115 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2116 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_u32(uint32x2_t a,uint32x2_t b)2117 uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) {
2118 return vcombine_u32(a, b);
2119 }
2120
2121 // CHECK-LABEL: @test_vcombine_u64(
2122 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2123 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_u64(uint64x1_t a,uint64x1_t b)2124 uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) {
2125 return vcombine_u64(a, b);
2126 }
2127
2128 // CHECK-LABEL: @test_vcombine_p8(
2129 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2130 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_p8(poly8x8_t a,poly8x8_t b)2131 poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) {
2132 return vcombine_p8(a, b);
2133 }
2134
2135 // CHECK-LABEL: @test_vcombine_p16(
2136 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2137 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_p16(poly16x4_t a,poly16x4_t b)2138 poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) {
2139 return vcombine_p16(a, b);
2140 }
2141
2142 // CHECK-LABEL: @test_vcreate_s8(
2143 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2144 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2145 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_s8(uint64_t a)2146 int8x8_t test_vcreate_s8(uint64_t a) {
2147 return vclz_s8(vcreate_s8(a));
2148 }
2149
2150 // CHECK-LABEL: @test_vcreate_imm
2151 // CHECK: [[RES:%.*]] = bitcast i64 0 to <4 x i16>
2152 // CHECK: ret <4 x i16> [[RES]]
test_vcreate_imm(void)2153 int16x4_t test_vcreate_imm(void) {
2154 return vcreate_s16(0);
2155 }
2156
2157 // CHECK-LABEL: @test_vcreate_s16(
2158 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2159 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2160 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2161 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2162 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
test_vcreate_s16(uint64_t a)2163 int16x4_t test_vcreate_s16(uint64_t a) {
2164 return vclz_s16(vcreate_s16(a));
2165 }
2166
2167 // CHECK-LABEL: @test_vcreate_s32(
2168 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2169 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2170 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2171 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2172 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
test_vcreate_s32(uint64_t a)2173 int32x2_t test_vcreate_s32(uint64_t a) {
2174 return vclz_s32(vcreate_s32(a));
2175 }
2176
2177 // CHECK-LABEL: @test_vcreate_f16(
2178 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x half>
2179 // CHECK: ret <4 x half> [[TMP0]]
test_vcreate_f16(uint64_t a)2180 float16x4_t test_vcreate_f16(uint64_t a) {
2181 return vcreate_f16(a);
2182 }
2183
2184 // CHECK-LABEL: @test_vcreate_f32(
2185 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x float>
2186 // CHECK: ret <2 x float> [[TMP0]]
test_vcreate_f32(uint64_t a)2187 float32x2_t test_vcreate_f32(uint64_t a) {
2188 return vcreate_f32(a);
2189 }
2190
2191 // CHECK-LABEL: @test_vcreate_u8(
2192 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2193 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2194 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_u8(uint64_t a)2195 int8x8_t test_vcreate_u8(uint64_t a) {
2196 return vclz_s8((int8x8_t)vcreate_u8(a));
2197 }
2198
2199 // CHECK-LABEL: @test_vcreate_u16(
2200 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2201 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2202 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2203 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2204 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
test_vcreate_u16(uint64_t a)2205 int16x4_t test_vcreate_u16(uint64_t a) {
2206 return vclz_s16((int16x4_t)vcreate_u16(a));
2207 }
2208
2209 // CHECK-LABEL: @test_vcreate_u32(
2210 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2211 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2212 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2213 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2214 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
test_vcreate_u32(uint64_t a)2215 int32x2_t test_vcreate_u32(uint64_t a) {
2216 return vclz_s32((int32x2_t)vcreate_u32(a));
2217 }
2218
2219 // CHECK-LABEL: @test_vcreate_u64(
2220 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2221 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2222 // CHECK: ret <1 x i64> [[ADD_I]]
test_vcreate_u64(uint64_t a)2223 uint64x1_t test_vcreate_u64(uint64_t a) {
2224 uint64x1_t tmp = vcreate_u64(a);
2225 return vadd_u64(tmp, tmp);
2226 }
2227
2228 // CHECK-LABEL: @test_vcreate_p8(
2229 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2230 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]])
2231 // CHECK: ret <8 x i8> [[VCNT_V_I]]
test_vcreate_p8(uint64_t a)2232 poly8x8_t test_vcreate_p8(uint64_t a) {
2233 return vcnt_p8(vcreate_p8(a));
2234 }
2235
2236 // CHECK-LABEL: @test_vcreate_p16(
2237 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2238 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2239 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2240 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2241 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
2242 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
2243 // CHECK: ret <4 x i16> [[TMP4]]
test_vcreate_p16(uint64_t a)2244 poly16x4_t test_vcreate_p16(uint64_t a) {
2245 poly16x4_t tmp = vcreate_p16(a);
2246 return vbsl_p16((uint16x4_t)tmp, tmp, tmp);
2247 }
2248
2249 // CHECK-LABEL: @test_vcreate_s64(
2250 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2251 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2252 // CHECK: ret <1 x i64> [[ADD_I]]
test_vcreate_s64(uint64_t a)2253 int64x1_t test_vcreate_s64(uint64_t a) {
2254 int64x1_t tmp = vcreate_s64(a);
2255 return vadd_s64(tmp, tmp);
2256 }
2257
2258 // CHECK-LABEL: @test_vcvt_f16_f32(
2259 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2260 // CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a)
2261 // CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
2262 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
2263 // CHECK: ret <4 x half> [[TMP1]]
test_vcvt_f16_f32(float32x4_t a)2264 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
2265 return vcvt_f16_f32(a);
2266 }
2267
2268 // CHECK-LABEL: @test_vcvt_f32_s32(
2269 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2270 // CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float>
2271 // CHECK: ret <2 x float> [[VCVT_I]]
test_vcvt_f32_s32(int32x2_t a)2272 float32x2_t test_vcvt_f32_s32(int32x2_t a) {
2273 return vcvt_f32_s32(a);
2274 }
2275
2276 // CHECK-LABEL: @test_vcvt_f32_u32(
2277 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2278 // CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float>
2279 // CHECK: ret <2 x float> [[VCVT_I]]
test_vcvt_f32_u32(uint32x2_t a)2280 float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
2281 return vcvt_f32_u32(a);
2282 }
2283
2284 // CHECK-LABEL: @test_vcvtq_f32_s32(
2285 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2286 // CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float>
2287 // CHECK: ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_s32(int32x4_t a)2288 float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
2289 return vcvtq_f32_s32(a);
2290 }
2291
2292 // CHECK-LABEL: @test_vcvtq_f32_u32(
2293 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2294 // CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float>
2295 // CHECK: ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_u32(uint32x4_t a)2296 float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
2297 return vcvtq_f32_u32(a);
2298 }
2299
2300 // CHECK-LABEL: @test_vcvt_f32_f16(
2301 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
2302 // CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2303 // CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
2304 // CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
2305 // CHECK: ret <4 x float> [[VCVT_F32_F161_I]]
test_vcvt_f32_f16(float16x4_t a)2306 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
2307 return vcvt_f32_f16(a);
2308 }
2309
2310 // CHECK-LABEL: @test_vcvt_n_f32_s32(
2311 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2312 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2313 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2314 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)2315 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
2316 return vcvt_n_f32_s32(a, 1);
2317 }
2318
2319 // CHECK-LABEL: @test_vcvt_n_f32_u32(
2320 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2321 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2322 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2323 // CHECK: ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)2324 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
2325 return vcvt_n_f32_u32(a, 1);
2326 }
2327
2328 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
2329 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2330 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2331 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2332 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)2333 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
2334 return vcvtq_n_f32_s32(a, 3);
2335 }
2336
2337 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
2338 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2339 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2340 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2341 // CHECK: ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)2342 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
2343 return vcvtq_n_f32_u32(a, 3);
2344 }
2345
2346 // CHECK-LABEL: @test_vcvt_n_s32_f32(
2347 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2348 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2349 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2350 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)2351 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
2352 return vcvt_n_s32_f32(a, 1);
2353 }
2354
2355 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
2356 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2357 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2358 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2359 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)2360 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
2361 return vcvtq_n_s32_f32(a, 3);
2362 }
2363
2364 // CHECK-LABEL: @test_vcvt_n_u32_f32(
2365 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2366 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2367 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2368 // CHECK: ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)2369 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
2370 return vcvt_n_u32_f32(a, 1);
2371 }
2372
2373 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
2374 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2375 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2376 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2377 // CHECK: ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)2378 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
2379 return vcvtq_n_u32_f32(a, 3);
2380 }
2381
2382 // CHECK-LABEL: @test_vcvt_s32_f32(
2383 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2384 // CHECK: [[VCVT_I:%.*]] = fptosi <2 x float> %a to <2 x i32>
2385 // CHECK: ret <2 x i32> [[VCVT_I]]
test_vcvt_s32_f32(float32x2_t a)2386 int32x2_t test_vcvt_s32_f32(float32x2_t a) {
2387 return vcvt_s32_f32(a);
2388 }
2389
2390 // CHECK-LABEL: @test_vcvtq_s32_f32(
2391 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2392 // CHECK: [[VCVT_I:%.*]] = fptosi <4 x float> %a to <4 x i32>
2393 // CHECK: ret <4 x i32> [[VCVT_I]]
test_vcvtq_s32_f32(float32x4_t a)2394 int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
2395 return vcvtq_s32_f32(a);
2396 }
2397
2398 // CHECK-LABEL: @test_vcvt_u32_f32(
2399 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2400 // CHECK: [[VCVT_I:%.*]] = fptoui <2 x float> %a to <2 x i32>
2401 // CHECK: ret <2 x i32> [[VCVT_I]]
test_vcvt_u32_f32(float32x2_t a)2402 uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
2403 return vcvt_u32_f32(a);
2404 }
2405
2406 // CHECK-LABEL: @test_vcvtq_u32_f32(
2407 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2408 // CHECK: [[VCVT_I:%.*]] = fptoui <4 x float> %a to <4 x i32>
2409 // CHECK: ret <4 x i32> [[VCVT_I]]
test_vcvtq_u32_f32(float32x4_t a)2410 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
2411 return vcvtq_u32_f32(a);
2412 }
2413
2414 // CHECK-LABEL: @test_vdup_lane_u8(
2415 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2416 // CHECK: ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_u8(uint8x8_t a)2417 uint8x8_t test_vdup_lane_u8(uint8x8_t a) {
2418 return vdup_lane_u8(a, 7);
2419 }
2420
2421 // CHECK-LABEL: @test_vdup_lane_u16(
2422 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2423 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2424 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2425 // CHECK: ret <4 x i16> [[LANE]]
test_vdup_lane_u16(uint16x4_t a)2426 uint16x4_t test_vdup_lane_u16(uint16x4_t a) {
2427 return vdup_lane_u16(a, 3);
2428 }
2429
2430 // CHECK-LABEL: @test_vdup_lane_u32(
2431 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2432 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2433 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2434 // CHECK: ret <2 x i32> [[LANE]]
test_vdup_lane_u32(uint32x2_t a)2435 uint32x2_t test_vdup_lane_u32(uint32x2_t a) {
2436 return vdup_lane_u32(a, 1);
2437 }
2438
2439 // CHECK-LABEL: @test_vdup_lane_s8(
2440 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2441 // CHECK: ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_s8(int8x8_t a)2442 int8x8_t test_vdup_lane_s8(int8x8_t a) {
2443 return vdup_lane_s8(a, 7);
2444 }
2445
2446 // CHECK-LABEL: @test_vdup_lane_s16(
2447 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2448 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2449 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2450 // CHECK: ret <4 x i16> [[LANE]]
test_vdup_lane_s16(int16x4_t a)2451 int16x4_t test_vdup_lane_s16(int16x4_t a) {
2452 return vdup_lane_s16(a, 3);
2453 }
2454
2455 // CHECK-LABEL: @test_vdup_lane_s32(
2456 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2457 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2458 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2459 // CHECK: ret <2 x i32> [[LANE]]
test_vdup_lane_s32(int32x2_t a)2460 int32x2_t test_vdup_lane_s32(int32x2_t a) {
2461 return vdup_lane_s32(a, 1);
2462 }
2463
2464 // CHECK-LABEL: @test_vdup_lane_p8(
2465 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2466 // CHECK: ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_p8(poly8x8_t a)2467 poly8x8_t test_vdup_lane_p8(poly8x8_t a) {
2468 return vdup_lane_p8(a, 7);
2469 }
2470
2471 // CHECK-LABEL: @test_vdup_lane_p16(
2472 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2473 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2474 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2475 // CHECK: ret <4 x i16> [[LANE]]
test_vdup_lane_p16(poly16x4_t a)2476 poly16x4_t test_vdup_lane_p16(poly16x4_t a) {
2477 return vdup_lane_p16(a, 3);
2478 }
2479
2480 // CHECK-LABEL: @test_vdup_lane_f32(
2481 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2482 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2483 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
2484 // CHECK: ret <2 x float> [[LANE]]
test_vdup_lane_f32(float32x2_t a)2485 float32x2_t test_vdup_lane_f32(float32x2_t a) {
2486 return vdup_lane_f32(a, 1);
2487 }
2488
2489 // CHECK-LABEL: @test_vdupq_lane_u8(
2490 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2491 // CHECK: ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_u8(uint8x8_t a)2492 uint8x16_t test_vdupq_lane_u8(uint8x8_t a) {
2493 return vdupq_lane_u8(a, 7);
2494 }
2495
2496 // CHECK-LABEL: @test_vdupq_lane_u16(
2497 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2498 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2499 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2500 // CHECK: ret <8 x i16> [[LANE]]
test_vdupq_lane_u16(uint16x4_t a)2501 uint16x8_t test_vdupq_lane_u16(uint16x4_t a) {
2502 return vdupq_lane_u16(a, 3);
2503 }
2504
2505 // CHECK-LABEL: @test_vdupq_lane_u32(
2506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2507 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2508 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2509 // CHECK: ret <4 x i32> [[LANE]]
test_vdupq_lane_u32(uint32x2_t a)2510 uint32x4_t test_vdupq_lane_u32(uint32x2_t a) {
2511 return vdupq_lane_u32(a, 1);
2512 }
2513
2514 // CHECK-LABEL: @test_vdupq_lane_s8(
2515 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2516 // CHECK: ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_s8(int8x8_t a)2517 int8x16_t test_vdupq_lane_s8(int8x8_t a) {
2518 return vdupq_lane_s8(a, 7);
2519 }
2520
2521 // CHECK-LABEL: @test_vdupq_lane_s16(
2522 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2523 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2524 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2525 // CHECK: ret <8 x i16> [[LANE]]
test_vdupq_lane_s16(int16x4_t a)2526 int16x8_t test_vdupq_lane_s16(int16x4_t a) {
2527 return vdupq_lane_s16(a, 3);
2528 }
2529
2530 // CHECK-LABEL: @test_vdupq_lane_s32(
2531 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2532 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2533 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2534 // CHECK: ret <4 x i32> [[LANE]]
test_vdupq_lane_s32(int32x2_t a)2535 int32x4_t test_vdupq_lane_s32(int32x2_t a) {
2536 return vdupq_lane_s32(a, 1);
2537 }
2538
2539 // CHECK-LABEL: @test_vdupq_lane_p8(
2540 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2541 // CHECK: ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_p8(poly8x8_t a)2542 poly8x16_t test_vdupq_lane_p8(poly8x8_t a) {
2543 return vdupq_lane_p8(a, 7);
2544 }
2545
2546 // CHECK-LABEL: @test_vdupq_lane_p16(
2547 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2548 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2549 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2550 // CHECK: ret <8 x i16> [[LANE]]
test_vdupq_lane_p16(poly16x4_t a)2551 poly16x8_t test_vdupq_lane_p16(poly16x4_t a) {
2552 return vdupq_lane_p16(a, 3);
2553 }
2554
2555 // CHECK-LABEL: @test_vdupq_lane_f32(
2556 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2557 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2558 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2559 // CHECK: ret <4 x float> [[LANE]]
test_vdupq_lane_f32(float32x2_t a)2560 float32x4_t test_vdupq_lane_f32(float32x2_t a) {
2561 return vdupq_lane_f32(a, 1);
2562 }
2563
2564 // CHECK-LABEL: @test_vdup_lane_s64(
2565 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2566 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2567 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2568 // CHECK: ret <1 x i64> [[LANE]]
test_vdup_lane_s64(int64x1_t a)2569 int64x1_t test_vdup_lane_s64(int64x1_t a) {
2570 return vdup_lane_s64(a, 0);
2571 }
2572
2573 // CHECK-LABEL: @test_vdup_lane_u64(
2574 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2575 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2576 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2577 // CHECK: ret <1 x i64> [[LANE]]
test_vdup_lane_u64(uint64x1_t a)2578 uint64x1_t test_vdup_lane_u64(uint64x1_t a) {
2579 return vdup_lane_u64(a, 0);
2580 }
2581
2582 // CHECK-LABEL: @test_vdupq_lane_s64(
2583 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2584 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2585 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2586 // CHECK: ret <2 x i64> [[LANE]]
test_vdupq_lane_s64(int64x1_t a)2587 int64x2_t test_vdupq_lane_s64(int64x1_t a) {
2588 return vdupq_lane_s64(a, 0);
2589 }
2590
2591 // CHECK-LABEL: @test_vdupq_lane_u64(
2592 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2593 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2594 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2595 // CHECK: ret <2 x i64> [[LANE]]
test_vdupq_lane_u64(uint64x1_t a)2596 uint64x2_t test_vdupq_lane_u64(uint64x1_t a) {
2597 return vdupq_lane_u64(a, 0);
2598 }
2599
2600 // CHECK-LABEL: @test_vdup_n_u8(
2601 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2602 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2603 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2604 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2605 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2606 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2607 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2608 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2609 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_u8(uint8_t a)2610 uint8x8_t test_vdup_n_u8(uint8_t a) {
2611 return vdup_n_u8(a);
2612 }
2613
2614 // CHECK-LABEL: @test_vdup_n_u16(
2615 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2616 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2617 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2618 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2619 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_u16(uint16_t a)2620 uint16x4_t test_vdup_n_u16(uint16_t a) {
2621 return vdup_n_u16(a);
2622 }
2623
2624 // CHECK-LABEL: @test_vdup_n_u32(
2625 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2626 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2627 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_u32(uint32_t a)2628 uint32x2_t test_vdup_n_u32(uint32_t a) {
2629 return vdup_n_u32(a);
2630 }
2631
2632 // CHECK-LABEL: @test_vdup_n_s8(
2633 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2634 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2635 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2636 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2637 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2638 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2639 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2640 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2641 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_s8(int8_t a)2642 int8x8_t test_vdup_n_s8(int8_t a) {
2643 return vdup_n_s8(a);
2644 }
2645
2646 // CHECK-LABEL: @test_vdup_n_s16(
2647 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2648 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2649 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2650 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2651 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_s16(int16_t a)2652 int16x4_t test_vdup_n_s16(int16_t a) {
2653 return vdup_n_s16(a);
2654 }
2655
2656 // CHECK-LABEL: @test_vdup_n_s32(
2657 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2658 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2659 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_s32(int32_t a)2660 int32x2_t test_vdup_n_s32(int32_t a) {
2661 return vdup_n_s32(a);
2662 }
2663
2664 // CHECK-LABEL: @test_vdup_n_p8(
2665 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2666 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2667 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2668 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2669 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2670 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2671 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2672 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2673 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_p8(poly8_t a)2674 poly8x8_t test_vdup_n_p8(poly8_t a) {
2675 return vdup_n_p8(a);
2676 }
2677
2678 // CHECK-LABEL: @test_vdup_n_p16(
2679 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2680 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2681 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2682 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2683 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_p16(poly16_t a)2684 poly16x4_t test_vdup_n_p16(poly16_t a) {
2685 return vdup_n_p16(a);
2686 }
2687
2688 // CHECK-LABEL: @test_vdup_n_f16(
2689 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
2690 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
2691 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
2692 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
2693 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
2694 // CHECK: ret <4 x half> [[VECINIT3]]
test_vdup_n_f16(float16_t * a)2695 float16x4_t test_vdup_n_f16(float16_t *a) {
2696 return vdup_n_f16(*a);
2697 }
2698
2699 // CHECK-LABEL: @test_vdup_n_f32(
2700 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
2701 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
2702 // CHECK: ret <2 x float> [[VECINIT1_I]]
test_vdup_n_f32(float32_t a)2703 float32x2_t test_vdup_n_f32(float32_t a) {
2704 return vdup_n_f32(a);
2705 }
2706
2707 // CHECK-LABEL: @test_vdupq_n_u8(
2708 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2709 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2710 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2711 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2712 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2713 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2714 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2715 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2716 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2717 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2718 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2719 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2720 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2721 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2722 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2723 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2724 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_u8(uint8_t a)2725 uint8x16_t test_vdupq_n_u8(uint8_t a) {
2726 return vdupq_n_u8(a);
2727 }
2728
2729 // CHECK-LABEL: @test_vdupq_n_u16(
2730 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2731 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2732 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2733 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2734 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2735 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2736 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2737 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2738 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_u16(uint16_t a)2739 uint16x8_t test_vdupq_n_u16(uint16_t a) {
2740 return vdupq_n_u16(a);
2741 }
2742
2743 // CHECK-LABEL: @test_vdupq_n_u32(
2744 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2745 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2746 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2747 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2748 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_u32(uint32_t a)2749 uint32x4_t test_vdupq_n_u32(uint32_t a) {
2750 return vdupq_n_u32(a);
2751 }
2752
2753 // CHECK-LABEL: @test_vdupq_n_s8(
2754 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2755 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2756 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2757 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2758 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2759 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2760 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2761 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2762 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2763 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2764 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2765 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2766 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2767 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2768 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2769 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2770 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_s8(int8_t a)2771 int8x16_t test_vdupq_n_s8(int8_t a) {
2772 return vdupq_n_s8(a);
2773 }
2774
2775 // CHECK-LABEL: @test_vdupq_n_s16(
2776 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2777 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2778 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2779 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2780 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2781 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2782 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2783 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2784 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_s16(int16_t a)2785 int16x8_t test_vdupq_n_s16(int16_t a) {
2786 return vdupq_n_s16(a);
2787 }
2788
2789 // CHECK-LABEL: @test_vdupq_n_s32(
2790 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2791 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2792 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2793 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2794 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_s32(int32_t a)2795 int32x4_t test_vdupq_n_s32(int32_t a) {
2796 return vdupq_n_s32(a);
2797 }
2798
2799 // CHECK-LABEL: @test_vdupq_n_p8(
2800 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2801 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2802 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2803 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2804 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2805 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2806 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2807 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2808 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2809 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2810 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2811 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2812 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2813 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2814 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2815 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2816 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_p8(poly8_t a)2817 poly8x16_t test_vdupq_n_p8(poly8_t a) {
2818 return vdupq_n_p8(a);
2819 }
2820
2821 // CHECK-LABEL: @test_vdupq_n_p16(
2822 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2823 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2824 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2825 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2826 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2827 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2828 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2829 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2830 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_p16(poly16_t a)2831 poly16x8_t test_vdupq_n_p16(poly16_t a) {
2832 return vdupq_n_p16(a);
2833 }
2834
2835 // CHECK-LABEL: @test_vdupq_n_f16(
2836 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
2837 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
2838 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
2839 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
2840 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
2841 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
2842 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
2843 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
2844 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
2845 // CHECK: ret <8 x half> [[VECINIT7]]
test_vdupq_n_f16(float16_t * a)2846 float16x8_t test_vdupq_n_f16(float16_t *a) {
2847 return vdupq_n_f16(*a);
2848 }
2849
2850 // CHECK-LABEL: @test_vdupq_n_f32(
2851 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
2852 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
2853 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
2854 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
2855 // CHECK: ret <4 x float> [[VECINIT3_I]]
test_vdupq_n_f32(float32_t a)2856 float32x4_t test_vdupq_n_f32(float32_t a) {
2857 return vdupq_n_f32(a);
2858 }
2859
2860 // CHECK-LABEL: @test_vdup_n_s64(
2861 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2862 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2863 // CHECK: ret <1 x i64> [[ADD_I]]
test_vdup_n_s64(int64_t a)2864 int64x1_t test_vdup_n_s64(int64_t a) {
2865 int64x1_t tmp = vdup_n_s64(a);
2866 return vadd_s64(tmp, tmp);
2867 }
2868
2869 // CHECK-LABEL: @test_vdup_n_u64(
2870 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2871 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2872 // CHECK: ret <1 x i64> [[ADD_I]]
test_vdup_n_u64(uint64_t a)2873 int64x1_t test_vdup_n_u64(uint64_t a) {
2874 int64x1_t tmp = (int64x1_t)vdup_n_u64(a);
2875 return vadd_s64(tmp, tmp);
2876 }
2877
2878 // CHECK-LABEL: @test_vdupq_n_s64(
2879 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2880 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2881 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2882 // CHECK: ret <2 x i64> [[ADD_I]]
test_vdupq_n_s64(int64_t a)2883 int64x2_t test_vdupq_n_s64(int64_t a) {
2884 int64x2_t tmp = vdupq_n_s64(a);
2885 return vaddq_s64(tmp, tmp);
2886 }
2887
2888 // CHECK-LABEL: @test_vdupq_n_u64(
2889 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2890 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2891 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2892 // CHECK: ret <2 x i64> [[ADD_I]]
test_vdupq_n_u64(uint64_t a)2893 uint64x2_t test_vdupq_n_u64(uint64_t a) {
2894 uint64x2_t tmp = vdupq_n_u64(a);
2895 return vaddq_u64(tmp, tmp);
2896 }
2897
2898 // CHECK-LABEL: @test_veor_s8(
2899 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2900 // CHECK: ret <8 x i8> [[XOR_I]]
test_veor_s8(int8x8_t a,int8x8_t b)2901 int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
2902 return veor_s8(a, b);
2903 }
2904
2905 // CHECK-LABEL: @test_veor_s16(
2906 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2907 // CHECK: ret <4 x i16> [[XOR_I]]
test_veor_s16(int16x4_t a,int16x4_t b)2908 int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
2909 return veor_s16(a, b);
2910 }
2911
2912 // CHECK-LABEL: @test_veor_s32(
2913 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2914 // CHECK: ret <2 x i32> [[XOR_I]]
test_veor_s32(int32x2_t a,int32x2_t b)2915 int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
2916 return veor_s32(a, b);
2917 }
2918
2919 // CHECK-LABEL: @test_veor_s64(
2920 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
2921 // CHECK: ret <1 x i64> [[XOR_I]]
test_veor_s64(int64x1_t a,int64x1_t b)2922 int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
2923 return veor_s64(a, b);
2924 }
2925
2926 // CHECK-LABEL: @test_veor_u8(
2927 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2928 // CHECK: ret <8 x i8> [[XOR_I]]
test_veor_u8(uint8x8_t a,uint8x8_t b)2929 uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
2930 return veor_u8(a, b);
2931 }
2932
2933 // CHECK-LABEL: @test_veor_u16(
2934 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2935 // CHECK: ret <4 x i16> [[XOR_I]]
test_veor_u16(uint16x4_t a,uint16x4_t b)2936 uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
2937 return veor_u16(a, b);
2938 }
2939
2940 // CHECK-LABEL: @test_veor_u32(
2941 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2942 // CHECK: ret <2 x i32> [[XOR_I]]
test_veor_u32(uint32x2_t a,uint32x2_t b)2943 uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
2944 return veor_u32(a, b);
2945 }
2946
2947 // CHECK-LABEL: @test_veor_u64(
2948 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
2949 // CHECK: ret <1 x i64> [[XOR_I]]
test_veor_u64(uint64x1_t a,uint64x1_t b)2950 uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
2951 return veor_u64(a, b);
2952 }
2953
2954 // CHECK-LABEL: @test_veorq_s8(
2955 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
2956 // CHECK: ret <16 x i8> [[XOR_I]]
test_veorq_s8(int8x16_t a,int8x16_t b)2957 int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
2958 return veorq_s8(a, b);
2959 }
2960
2961 // CHECK-LABEL: @test_veorq_s16(
2962 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
2963 // CHECK: ret <8 x i16> [[XOR_I]]
test_veorq_s16(int16x8_t a,int16x8_t b)2964 int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
2965 return veorq_s16(a, b);
2966 }
2967
2968 // CHECK-LABEL: @test_veorq_s32(
2969 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
2970 // CHECK: ret <4 x i32> [[XOR_I]]
test_veorq_s32(int32x4_t a,int32x4_t b)2971 int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
2972 return veorq_s32(a, b);
2973 }
2974
2975 // CHECK-LABEL: @test_veorq_s64(
2976 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
2977 // CHECK: ret <2 x i64> [[XOR_I]]
test_veorq_s64(int64x2_t a,int64x2_t b)2978 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
2979 return veorq_s64(a, b);
2980 }
2981
2982 // CHECK-LABEL: @test_veorq_u8(
2983 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
2984 // CHECK: ret <16 x i8> [[XOR_I]]
test_veorq_u8(uint8x16_t a,uint8x16_t b)2985 uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
2986 return veorq_u8(a, b);
2987 }
2988
2989 // CHECK-LABEL: @test_veorq_u16(
2990 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
2991 // CHECK: ret <8 x i16> [[XOR_I]]
test_veorq_u16(uint16x8_t a,uint16x8_t b)2992 uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
2993 return veorq_u16(a, b);
2994 }
2995
2996 // CHECK-LABEL: @test_veorq_u32(
2997 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
2998 // CHECK: ret <4 x i32> [[XOR_I]]
test_veorq_u32(uint32x4_t a,uint32x4_t b)2999 uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
3000 return veorq_u32(a, b);
3001 }
3002
3003 // CHECK-LABEL: @test_veorq_u64(
3004 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3005 // CHECK: ret <2 x i64> [[XOR_I]]
test_veorq_u64(uint64x2_t a,uint64x2_t b)3006 uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
3007 return veorq_u64(a, b);
3008 }
3009
3010 // CHECK-LABEL: @test_vext_s8(
3011 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3012 // CHECK: ret <8 x i8> [[VEXT]]
test_vext_s8(int8x8_t a,int8x8_t b)3013 int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
3014 return vext_s8(a, b, 7);
3015 }
3016
3017 // CHECK-LABEL: @test_vext_u8(
3018 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3019 // CHECK: ret <8 x i8> [[VEXT]]
test_vext_u8(uint8x8_t a,uint8x8_t b)3020 uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
3021 return vext_u8(a, b, 7);
3022 }
3023
3024 // CHECK-LABEL: @test_vext_p8(
3025 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3026 // CHECK: ret <8 x i8> [[VEXT]]
test_vext_p8(poly8x8_t a,poly8x8_t b)3027 poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
3028 return vext_p8(a, b, 7);
3029 }
3030
3031 // CHECK-LABEL: @test_vext_s16(
3032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3033 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3034 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3035 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3036 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3037 // CHECK: ret <4 x i16> [[VEXT]]
test_vext_s16(int16x4_t a,int16x4_t b)3038 int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
3039 return vext_s16(a, b, 3);
3040 }
3041
3042 // CHECK-LABEL: @test_vext_u16(
3043 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3044 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3045 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3046 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3047 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3048 // CHECK: ret <4 x i16> [[VEXT]]
test_vext_u16(uint16x4_t a,uint16x4_t b)3049 uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
3050 return vext_u16(a, b, 3);
3051 }
3052
3053 // CHECK-LABEL: @test_vext_p16(
3054 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3055 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3056 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3057 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3058 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3059 // CHECK: ret <4 x i16> [[VEXT]]
test_vext_p16(poly16x4_t a,poly16x4_t b)3060 poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
3061 return vext_p16(a, b, 3);
3062 }
3063
3064 // CHECK-LABEL: @test_vext_s32(
3065 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3066 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3067 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3068 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3069 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3070 // CHECK: ret <2 x i32> [[VEXT]]
test_vext_s32(int32x2_t a,int32x2_t b)3071 int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
3072 return vext_s32(a, b, 1);
3073 }
3074
3075 // CHECK-LABEL: @test_vext_u32(
3076 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3077 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3078 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3079 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3080 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3081 // CHECK: ret <2 x i32> [[VEXT]]
test_vext_u32(uint32x2_t a,uint32x2_t b)3082 uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
3083 return vext_u32(a, b, 1);
3084 }
3085
3086 // CHECK-LABEL: @test_vext_s64(
3087 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3088 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3089 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3090 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3091 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3092 // CHECK: ret <1 x i64> [[VEXT]]
test_vext_s64(int64x1_t a,int64x1_t b)3093 int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
3094 return vext_s64(a, b, 0);
3095 }
3096
3097 // CHECK-LABEL: @test_vext_u64(
3098 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3099 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3100 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3101 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3102 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3103 // CHECK: ret <1 x i64> [[VEXT]]
test_vext_u64(uint64x1_t a,uint64x1_t b)3104 uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
3105 return vext_u64(a, b, 0);
3106 }
3107
3108 // CHECK-LABEL: @test_vext_f32(
3109 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3110 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3111 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3112 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3113 // CHECK: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2>
3114 // CHECK: ret <2 x float> [[VEXT]]
test_vext_f32(float32x2_t a,float32x2_t b)3115 float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
3116 return vext_f32(a, b, 1);
3117 }
3118
3119 // CHECK-LABEL: @test_vextq_s8(
3120 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3121 // CHECK: ret <16 x i8> [[VEXT]]
test_vextq_s8(int8x16_t a,int8x16_t b)3122 int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
3123 return vextq_s8(a, b, 15);
3124 }
3125
3126 // CHECK-LABEL: @test_vextq_u8(
3127 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3128 // CHECK: ret <16 x i8> [[VEXT]]
test_vextq_u8(uint8x16_t a,uint8x16_t b)3129 uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
3130 return vextq_u8(a, b, 15);
3131 }
3132
3133 // CHECK-LABEL: @test_vextq_p8(
3134 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3135 // CHECK: ret <16 x i8> [[VEXT]]
test_vextq_p8(poly8x16_t a,poly8x16_t b)3136 poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
3137 return vextq_p8(a, b, 15);
3138 }
3139
3140 // CHECK-LABEL: @test_vextq_s16(
3141 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3142 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3143 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3144 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3145 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3146 // CHECK: ret <8 x i16> [[VEXT]]
test_vextq_s16(int16x8_t a,int16x8_t b)3147 int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
3148 return vextq_s16(a, b, 7);
3149 }
3150
3151 // CHECK-LABEL: @test_vextq_u16(
3152 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3153 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3154 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3155 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3156 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3157 // CHECK: ret <8 x i16> [[VEXT]]
test_vextq_u16(uint16x8_t a,uint16x8_t b)3158 uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
3159 return vextq_u16(a, b, 7);
3160 }
3161
3162 // CHECK-LABEL: @test_vextq_p16(
3163 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3164 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3165 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3166 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3167 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3168 // CHECK: ret <8 x i16> [[VEXT]]
test_vextq_p16(poly16x8_t a,poly16x8_t b)3169 poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
3170 return vextq_p16(a, b, 7);
3171 }
3172
3173 // CHECK-LABEL: @test_vextq_s32(
3174 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3175 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3176 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3177 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3178 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3179 // CHECK: ret <4 x i32> [[VEXT]]
test_vextq_s32(int32x4_t a,int32x4_t b)3180 int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
3181 return vextq_s32(a, b, 3);
3182 }
3183
3184 // CHECK-LABEL: @test_vextq_u32(
3185 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3186 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3187 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3188 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3189 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3190 // CHECK: ret <4 x i32> [[VEXT]]
test_vextq_u32(uint32x4_t a,uint32x4_t b)3191 uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
3192 return vextq_u32(a, b, 3);
3193 }
3194
3195 // CHECK-LABEL: @test_vextq_s64(
3196 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3197 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3198 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3199 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3200 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3201 // CHECK: ret <2 x i64> [[VEXT]]
test_vextq_s64(int64x2_t a,int64x2_t b)3202 int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
3203 return vextq_s64(a, b, 1);
3204 }
3205
3206 // CHECK-LABEL: @test_vextq_u64(
3207 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3208 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3209 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3210 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3211 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3212 // CHECK: ret <2 x i64> [[VEXT]]
test_vextq_u64(uint64x2_t a,uint64x2_t b)3213 uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
3214 return vextq_u64(a, b, 1);
3215 }
3216
3217 // CHECK-LABEL: @test_vextq_f32(
3218 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3219 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3220 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3221 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3222 // CHECK: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3223 // CHECK: ret <4 x float> [[VEXT]]
test_vextq_f32(float32x4_t a,float32x4_t b)3224 float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
3225 return vextq_f32(a, b, 3);
3226 }
3227
3228 // CHECK-LABEL: @test_vfma_f32(
3229 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3230 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3231 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3232 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a)
3233 // CHECK: ret <2 x float> [[TMP3]]
test_vfma_f32(float32x2_t a,float32x2_t b,float32x2_t c)3234 float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3235 return vfma_f32(a, b, c);
3236 }
3237
3238 // CHECK-LABEL: @test_vfmaq_f32(
3239 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3240 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3241 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3242 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a)
3243 // CHECK: ret <4 x float> [[TMP3]]
test_vfmaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3244 float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3245 return vfmaq_f32(a, b, c);
3246 }
3247
3248 // CHECK-LABEL: @test_vfms_f32(
3249 // CHECK: [[SUB_I:%.*]] = fneg <2 x float> %b
3250 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3251 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
3252 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3253 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %c, <2 x float> %a)
3254 // CHECK: ret <2 x float> [[TMP3]]
test_vfms_f32(float32x2_t a,float32x2_t b,float32x2_t c)3255 float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3256 return vfms_f32(a, b, c);
3257 }
3258
3259 // CHECK-LABEL: @test_vfmsq_f32(
3260 // CHECK: [[SUB_I:%.*]] = fneg <4 x float> %b
3261 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3262 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
3263 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3264 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %c, <4 x float> %a)
3265 // CHECK: ret <4 x float> [[TMP3]]
test_vfmsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3266 float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3267 return vfmsq_f32(a, b, c);
3268 }
3269
3270 // CHECK-LABEL: @test_vget_high_s8(
3271 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3272 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_s8(int8x16_t a)3273 int8x8_t test_vget_high_s8(int8x16_t a) {
3274 return vget_high_s8(a);
3275 }
3276
3277 // CHECK-LABEL: @test_vget_high_s16(
3278 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3279 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_s16(int16x8_t a)3280 int16x4_t test_vget_high_s16(int16x8_t a) {
3281 return vget_high_s16(a);
3282 }
3283
3284 // CHECK-LABEL: @test_vget_high_s32(
3285 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3286 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_s32(int32x4_t a)3287 int32x2_t test_vget_high_s32(int32x4_t a) {
3288 return vget_high_s32(a);
3289 }
3290
3291 // CHECK-LABEL: @test_vget_high_s64(
3292 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3293 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_s64(int64x2_t a)3294 int64x1_t test_vget_high_s64(int64x2_t a) {
3295 return vget_high_s64(a);
3296 }
3297
3298 // CHECK-LABEL: @test_vget_high_f16(
3299 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3300 // CHECK: ret <4 x half> [[SHUFFLE_I]]
test_vget_high_f16(float16x8_t a)3301 float16x4_t test_vget_high_f16(float16x8_t a) {
3302 return vget_high_f16(a);
3303 }
3304
3305 // CHECK-LABEL: @test_vget_high_f32(
3306 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
3307 // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vget_high_f32(float32x4_t a)3308 float32x2_t test_vget_high_f32(float32x4_t a) {
3309 return vget_high_f32(a);
3310 }
3311
3312 // CHECK-LABEL: @test_vget_high_u8(
3313 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3314 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_u8(uint8x16_t a)3315 uint8x8_t test_vget_high_u8(uint8x16_t a) {
3316 return vget_high_u8(a);
3317 }
3318
3319 // CHECK-LABEL: @test_vget_high_u16(
3320 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3321 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_u16(uint16x8_t a)3322 uint16x4_t test_vget_high_u16(uint16x8_t a) {
3323 return vget_high_u16(a);
3324 }
3325
3326 // CHECK-LABEL: @test_vget_high_u32(
3327 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3328 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_u32(uint32x4_t a)3329 uint32x2_t test_vget_high_u32(uint32x4_t a) {
3330 return vget_high_u32(a);
3331 }
3332
3333 // CHECK-LABEL: @test_vget_high_u64(
3334 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3335 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_u64(uint64x2_t a)3336 uint64x1_t test_vget_high_u64(uint64x2_t a) {
3337 return vget_high_u64(a);
3338 }
3339
3340 // CHECK-LABEL: @test_vget_high_p8(
3341 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3342 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_p8(poly8x16_t a)3343 poly8x8_t test_vget_high_p8(poly8x16_t a) {
3344 return vget_high_p8(a);
3345 }
3346
3347 // CHECK-LABEL: @test_vget_high_p16(
3348 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3349 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_p16(poly16x8_t a)3350 poly16x4_t test_vget_high_p16(poly16x8_t a) {
3351 return vget_high_p16(a);
3352 }
3353
3354 // CHECK-LABEL: @test_vget_lane_u8(
3355 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3356 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_u8(uint8x8_t a)3357 uint8_t test_vget_lane_u8(uint8x8_t a) {
3358 return vget_lane_u8(a, 7);
3359 }
3360
3361 // CHECK-LABEL: @test_vget_lane_u16(
3362 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3363 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_u16(uint16x4_t a)3364 uint16_t test_vget_lane_u16(uint16x4_t a) {
3365 return vget_lane_u16(a, 3);
3366 }
3367
3368 // CHECK-LABEL: @test_vget_lane_u32(
3369 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3370 // CHECK: ret i32 [[VGET_LANE]]
test_vget_lane_u32(uint32x2_t a)3371 uint32_t test_vget_lane_u32(uint32x2_t a) {
3372 return vget_lane_u32(a, 1);
3373 }
3374
3375 // CHECK-LABEL: @test_vget_lane_s8(
3376 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3377 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_s8(int8x8_t a)3378 int8_t test_vget_lane_s8(int8x8_t a) {
3379 return vget_lane_s8(a, 7);
3380 }
3381
3382 // CHECK-LABEL: @test_vget_lane_s16(
3383 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3384 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_s16(int16x4_t a)3385 int16_t test_vget_lane_s16(int16x4_t a) {
3386 return vget_lane_s16(a, 3);
3387 }
3388
3389 // CHECK-LABEL: @test_vget_lane_s32(
3390 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3391 // CHECK: ret i32 [[VGET_LANE]]
test_vget_lane_s32(int32x2_t a)3392 int32_t test_vget_lane_s32(int32x2_t a) {
3393 return vget_lane_s32(a, 1);
3394 }
3395
3396 // CHECK-LABEL: @test_vget_lane_p8(
3397 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3398 // CHECK: ret i8 [[VGET_LANE]]
test_vget_lane_p8(poly8x8_t a)3399 poly8_t test_vget_lane_p8(poly8x8_t a) {
3400 return vget_lane_p8(a, 7);
3401 }
3402
3403 // CHECK-LABEL: @test_vget_lane_p16(
3404 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3405 // CHECK: ret i16 [[VGET_LANE]]
test_vget_lane_p16(poly16x4_t a)3406 poly16_t test_vget_lane_p16(poly16x4_t a) {
3407 return vget_lane_p16(a, 3);
3408 }
3409
3410 // CHECK-LABEL: @test_vget_lane_f32(
3411 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
3412 // CHECK: ret float [[VGET_LANE]]
test_vget_lane_f32(float32x2_t a)3413 float32_t test_vget_lane_f32(float32x2_t a) {
3414 return vget_lane_f32(a, 1);
3415 }
3416
3417 // CHECK-LABEL: @test_vget_lane_f16(
3418 // CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8
3419 // CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2
3420 // CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
3421 // CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
3422 // CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
3423 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
3424 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
3425 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
3426 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3427 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
3428 // CHECK: ret float [[CONV]]
test_vget_lane_f16(float16x4_t a)3429 float32_t test_vget_lane_f16(float16x4_t a) {
3430 return vget_lane_f16(a, 1);
3431 }
3432
3433 // CHECK-LABEL: @test_vgetq_lane_u8(
3434 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3435 // CHECK: ret i8 [[VGET_LANE]]
test_vgetq_lane_u8(uint8x16_t a)3436 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
3437 return vgetq_lane_u8(a, 15);
3438 }
3439
3440 // CHECK-LABEL: @test_vgetq_lane_u16(
3441 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3442 // CHECK: ret i16 [[VGET_LANE]]
test_vgetq_lane_u16(uint16x8_t a)3443 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
3444 return vgetq_lane_u16(a, 7);
3445 }
3446
3447 // CHECK-LABEL: @test_vgetq_lane_u32(
3448 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3449 // CHECK: ret i32 [[VGET_LANE]]
test_vgetq_lane_u32(uint32x4_t a)3450 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
3451 return vgetq_lane_u32(a, 3);
3452 }
3453
3454 // CHECK-LABEL: @test_vgetq_lane_s8(
3455 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3456 // CHECK: ret i8 [[VGET_LANE]]
test_vgetq_lane_s8(int8x16_t a)3457 int8_t test_vgetq_lane_s8(int8x16_t a) {
3458 return vgetq_lane_s8(a, 15);
3459 }
3460
3461 // CHECK-LABEL: @test_vgetq_lane_s16(
3462 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3463 // CHECK: ret i16 [[VGET_LANE]]
test_vgetq_lane_s16(int16x8_t a)3464 int16_t test_vgetq_lane_s16(int16x8_t a) {
3465 return vgetq_lane_s16(a, 7);
3466 }
3467
3468 // CHECK-LABEL: @test_vgetq_lane_s32(
3469 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3470 // CHECK: ret i32 [[VGET_LANE]]
test_vgetq_lane_s32(int32x4_t a)3471 int32_t test_vgetq_lane_s32(int32x4_t a) {
3472 return vgetq_lane_s32(a, 3);
3473 }
3474
3475 // CHECK-LABEL: @test_vgetq_lane_p8(
3476 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3477 // CHECK: ret i8 [[VGET_LANE]]
test_vgetq_lane_p8(poly8x16_t a)3478 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
3479 return vgetq_lane_p8(a, 15);
3480 }
3481
3482 // CHECK-LABEL: @test_vgetq_lane_p16(
3483 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3484 // CHECK: ret i16 [[VGET_LANE]]
test_vgetq_lane_p16(poly16x8_t a)3485 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
3486 return vgetq_lane_p16(a, 7);
3487 }
3488
3489 // CHECK-LABEL: @test_vgetq_lane_f32(
3490 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> %a, i32 3
3491 // CHECK: ret float [[VGET_LANE]]
test_vgetq_lane_f32(float32x4_t a)3492 float32_t test_vgetq_lane_f32(float32x4_t a) {
3493 return vgetq_lane_f32(a, 3);
3494 }
3495
3496 // CHECK-LABEL: @test_vgetq_lane_f16(
3497 // CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
3498 // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
3499 // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
3500 // CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
3501 // CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
3502 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
3503 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2
3504 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
3505 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3506 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
3507 // CHECK: ret float [[CONV]]
test_vgetq_lane_f16(float16x8_t a)3508 float32_t test_vgetq_lane_f16(float16x8_t a) {
3509 return vgetq_lane_f16(a, 3);
3510 }
3511
3512 // CHECK-LABEL: @test_vget_lane_s64(
3513 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3514 // CHECK: ret i64 [[VGET_LANE]]
test_vget_lane_s64(int64x1_t a)3515 int64_t test_vget_lane_s64(int64x1_t a) {
3516 return vget_lane_s64(a, 0);
3517 }
3518
3519 // CHECK-LABEL: @test_vget_lane_u64(
3520 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3521 // CHECK: ret i64 [[VGET_LANE]]
test_vget_lane_u64(uint64x1_t a)3522 uint64_t test_vget_lane_u64(uint64x1_t a) {
3523 return vget_lane_u64(a, 0);
3524 }
3525
3526 // CHECK-LABEL: @test_vgetq_lane_s64(
3527 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3528 // CHECK: ret i64 [[VGET_LANE]]
test_vgetq_lane_s64(int64x2_t a)3529 int64_t test_vgetq_lane_s64(int64x2_t a) {
3530 return vgetq_lane_s64(a, 1);
3531 }
3532
3533 // CHECK-LABEL: @test_vgetq_lane_u64(
3534 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3535 // CHECK: ret i64 [[VGET_LANE]]
test_vgetq_lane_u64(uint64x2_t a)3536 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
3537 return vgetq_lane_u64(a, 1);
3538 }
3539
3540 // CHECK-LABEL: @test_vget_low_s8(
3541 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3542 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_s8(int8x16_t a)3543 int8x8_t test_vget_low_s8(int8x16_t a) {
3544 return vget_low_s8(a);
3545 }
3546
3547 // CHECK-LABEL: @test_vget_low_s16(
3548 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3549 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_s16(int16x8_t a)3550 int16x4_t test_vget_low_s16(int16x8_t a) {
3551 return vget_low_s16(a);
3552 }
3553
3554 // CHECK-LABEL: @test_vget_low_s32(
3555 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3556 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_s32(int32x4_t a)3557 int32x2_t test_vget_low_s32(int32x4_t a) {
3558 return vget_low_s32(a);
3559 }
3560
3561 // CHECK-LABEL: @test_vget_low_s64(
3562 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3563 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_s64(int64x2_t a)3564 int64x1_t test_vget_low_s64(int64x2_t a) {
3565 return vget_low_s64(a);
3566 }
3567
3568 // CHECK-LABEL: @test_vget_low_f16(
3569 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3570 // CHECK: ret <4 x half> [[SHUFFLE_I]]
test_vget_low_f16(float16x8_t a)3571 float16x4_t test_vget_low_f16(float16x8_t a) {
3572 return vget_low_f16(a);
3573 }
3574
3575 // CHECK-LABEL: @test_vget_low_f32(
3576 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
3577 // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vget_low_f32(float32x4_t a)3578 float32x2_t test_vget_low_f32(float32x4_t a) {
3579 return vget_low_f32(a);
3580 }
3581
3582 // CHECK-LABEL: @test_vget_low_u8(
3583 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3584 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_u8(uint8x16_t a)3585 uint8x8_t test_vget_low_u8(uint8x16_t a) {
3586 return vget_low_u8(a);
3587 }
3588
3589 // CHECK-LABEL: @test_vget_low_u16(
3590 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3591 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_u16(uint16x8_t a)3592 uint16x4_t test_vget_low_u16(uint16x8_t a) {
3593 return vget_low_u16(a);
3594 }
3595
3596 // CHECK-LABEL: @test_vget_low_u32(
3597 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3598 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_u32(uint32x4_t a)3599 uint32x2_t test_vget_low_u32(uint32x4_t a) {
3600 return vget_low_u32(a);
3601 }
3602
3603 // CHECK-LABEL: @test_vget_low_u64(
3604 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3605 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_u64(uint64x2_t a)3606 uint64x1_t test_vget_low_u64(uint64x2_t a) {
3607 return vget_low_u64(a);
3608 }
3609
3610 // CHECK-LABEL: @test_vget_low_p8(
3611 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3612 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_p8(poly8x16_t a)3613 poly8x8_t test_vget_low_p8(poly8x16_t a) {
3614 return vget_low_p8(a);
3615 }
3616
3617 // CHECK-LABEL: @test_vget_low_p16(
3618 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3619 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_p16(poly16x8_t a)3620 poly16x4_t test_vget_low_p16(poly16x8_t a) {
3621 return vget_low_p16(a);
3622 }
3623
3624 // CHECK-LABEL: @test_vhadd_s8(
3625 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
3626 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t a,int8x8_t b)3627 int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) {
3628 return vhadd_s8(a, b);
3629 }
3630
3631 // CHECK-LABEL: @test_vhadd_s16(
3632 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3633 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3634 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
3635 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3636 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_s16(int16x4_t a,int16x4_t b)3637 int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) {
3638 return vhadd_s16(a, b);
3639 }
3640
3641 // CHECK-LABEL: @test_vhadd_s32(
3642 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3643 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3644 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
3645 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3646 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_s32(int32x2_t a,int32x2_t b)3647 int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) {
3648 return vhadd_s32(a, b);
3649 }
3650
3651 // CHECK-LABEL: @test_vhadd_u8(
3652 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
3653 // CHECK: ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t a,uint8x8_t b)3654 uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) {
3655 return vhadd_u8(a, b);
3656 }
3657
3658 // CHECK-LABEL: @test_vhadd_u16(
3659 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3660 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3661 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
3662 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3663 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_u16(uint16x4_t a,uint16x4_t b)3664 uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) {
3665 return vhadd_u16(a, b);
3666 }
3667
3668 // CHECK-LABEL: @test_vhadd_u32(
3669 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3670 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3671 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
3672 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3673 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_u32(uint32x2_t a,uint32x2_t b)3674 uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) {
3675 return vhadd_u32(a, b);
3676 }
3677
3678 // CHECK-LABEL: @test_vhaddq_s8(
3679 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
3680 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t a,int8x16_t b)3681 int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) {
3682 return vhaddq_s8(a, b);
3683 }
3684
3685 // CHECK-LABEL: @test_vhaddq_s16(
3686 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3687 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3688 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
3689 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3690 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_s16(int16x8_t a,int16x8_t b)3691 int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) {
3692 return vhaddq_s16(a, b);
3693 }
3694
3695 // CHECK-LABEL: @test_vhaddq_s32(
3696 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3697 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3698 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
3699 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3700 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_s32(int32x4_t a,int32x4_t b)3701 int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) {
3702 return vhaddq_s32(a, b);
3703 }
3704
3705 // CHECK-LABEL: @test_vhaddq_u8(
3706 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
3707 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t a,uint8x16_t b)3708 uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) {
3709 return vhaddq_u8(a, b);
3710 }
3711
3712 // CHECK-LABEL: @test_vhaddq_u16(
3713 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3714 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3715 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
3716 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3717 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_u16(uint16x8_t a,uint16x8_t b)3718 uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) {
3719 return vhaddq_u16(a, b);
3720 }
3721
3722 // CHECK-LABEL: @test_vhaddq_u32(
3723 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3724 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3725 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
3726 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3727 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_u32(uint32x4_t a,uint32x4_t b)3728 uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) {
3729 return vhaddq_u32(a, b);
3730 }
3731
3732 // CHECK-LABEL: @test_vhsub_s8(
3733 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b)
3734 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t a,int8x8_t b)3735 int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) {
3736 return vhsub_s8(a, b);
3737 }
3738
3739 // CHECK-LABEL: @test_vhsub_s16(
3740 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3741 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3742 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b)
3743 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3744 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_s16(int16x4_t a,int16x4_t b)3745 int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) {
3746 return vhsub_s16(a, b);
3747 }
3748
3749 // CHECK-LABEL: @test_vhsub_s32(
3750 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3751 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3752 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b)
3753 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3754 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_s32(int32x2_t a,int32x2_t b)3755 int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) {
3756 return vhsub_s32(a, b);
3757 }
3758
3759 // CHECK-LABEL: @test_vhsub_u8(
3760 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b)
3761 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t a,uint8x8_t b)3762 uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) {
3763 return vhsub_u8(a, b);
3764 }
3765
3766 // CHECK-LABEL: @test_vhsub_u16(
3767 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3768 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3769 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b)
3770 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3771 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_u16(uint16x4_t a,uint16x4_t b)3772 uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) {
3773 return vhsub_u16(a, b);
3774 }
3775
3776 // CHECK-LABEL: @test_vhsub_u32(
3777 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3778 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3779 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b)
3780 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3781 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_u32(uint32x2_t a,uint32x2_t b)3782 uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) {
3783 return vhsub_u32(a, b);
3784 }
3785
3786 // CHECK-LABEL: @test_vhsubq_s8(
3787 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b)
3788 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t a,int8x16_t b)3789 int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) {
3790 return vhsubq_s8(a, b);
3791 }
3792
3793 // CHECK-LABEL: @test_vhsubq_s16(
3794 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3795 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3796 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b)
3797 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3798 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_s16(int16x8_t a,int16x8_t b)3799 int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) {
3800 return vhsubq_s16(a, b);
3801 }
3802
3803 // CHECK-LABEL: @test_vhsubq_s32(
3804 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3805 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3806 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b)
3807 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3808 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_s32(int32x4_t a,int32x4_t b)3809 int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) {
3810 return vhsubq_s32(a, b);
3811 }
3812
3813 // CHECK-LABEL: @test_vhsubq_u8(
3814 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b)
3815 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t a,uint8x16_t b)3816 uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) {
3817 return vhsubq_u8(a, b);
3818 }
3819
3820 // CHECK-LABEL: @test_vhsubq_u16(
3821 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3822 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3823 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b)
3824 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3825 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_u16(uint16x8_t a,uint16x8_t b)3826 uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) {
3827 return vhsubq_u16(a, b);
3828 }
3829
3830 // CHECK-LABEL: @test_vhsubq_u32(
3831 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3832 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3833 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b)
3834 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3835 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_u32(uint32x4_t a,uint32x4_t b)3836 uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) {
3837 return vhsubq_u32(a, b);
3838 }
3839
3840 // CHECK-LABEL: @test_vld1q_u8(
3841 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3842 // CHECK: ret <16 x i8> [[VLD1]]
test_vld1q_u8(uint8_t const * a)3843 uint8x16_t test_vld1q_u8(uint8_t const * a) {
3844 return vld1q_u8(a);
3845 }
3846
3847 // CHECK-LABEL: @test_vld1q_u16(
3848 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
3849 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3850 // CHECK: ret <8 x i16> [[VLD1]]
test_vld1q_u16(uint16_t const * a)3851 uint16x8_t test_vld1q_u16(uint16_t const * a) {
3852 return vld1q_u16(a);
3853 }
3854
3855 // CHECK-LABEL: @test_vld1q_u32(
3856 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
3857 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
3858 // CHECK: ret <4 x i32> [[VLD1]]
test_vld1q_u32(uint32_t const * a)3859 uint32x4_t test_vld1q_u32(uint32_t const * a) {
3860 return vld1q_u32(a);
3861 }
3862
3863 // CHECK-LABEL: @test_vld1q_u64(
3864 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
3865 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
3866 // CHECK: ret <2 x i64> [[VLD1]]
test_vld1q_u64(uint64_t const * a)3867 uint64x2_t test_vld1q_u64(uint64_t const * a) {
3868 return vld1q_u64(a);
3869 }
3870
3871 // CHECK-LABEL: @test_vld1q_s8(
3872 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3873 // CHECK: ret <16 x i8> [[VLD1]]
test_vld1q_s8(int8_t const * a)3874 int8x16_t test_vld1q_s8(int8_t const * a) {
3875 return vld1q_s8(a);
3876 }
3877
3878 // CHECK-LABEL: @test_vld1q_s16(
3879 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
3880 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3881 // CHECK: ret <8 x i16> [[VLD1]]
test_vld1q_s16(int16_t const * a)3882 int16x8_t test_vld1q_s16(int16_t const * a) {
3883 return vld1q_s16(a);
3884 }
3885
3886 // CHECK-LABEL: @test_vld1q_s32(
3887 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
3888 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
3889 // CHECK: ret <4 x i32> [[VLD1]]
test_vld1q_s32(int32_t const * a)3890 int32x4_t test_vld1q_s32(int32_t const * a) {
3891 return vld1q_s32(a);
3892 }
3893
3894 // CHECK-LABEL: @test_vld1q_s64(
3895 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
3896 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
3897 // CHECK: ret <2 x i64> [[VLD1]]
test_vld1q_s64(int64_t const * a)3898 int64x2_t test_vld1q_s64(int64_t const * a) {
3899 return vld1q_s64(a);
3900 }
3901
3902 // CHECK-LABEL: @test_vld1q_f16(
3903 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
3904 // CHECK: [[VLD1:%.*]] = call <8 x half> @llvm.arm.neon.vld1.v8f16.p0i8(i8* [[TMP0]], i32 2)
3905 // CHECK: ret <8 x half> [[VLD1]]
test_vld1q_f16(float16_t const * a)3906 float16x8_t test_vld1q_f16(float16_t const * a) {
3907 return vld1q_f16(a);
3908 }
3909
3910 // CHECK-LABEL: @test_vld1q_f32(
3911 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
3912 // CHECK: [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* [[TMP0]], i32 4)
3913 // CHECK: ret <4 x float> [[VLD1]]
test_vld1q_f32(float32_t const * a)3914 float32x4_t test_vld1q_f32(float32_t const * a) {
3915 return vld1q_f32(a);
3916 }
3917
3918 // CHECK-LABEL: @test_vld1q_p8(
3919 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3920 // CHECK: ret <16 x i8> [[VLD1]]
test_vld1q_p8(poly8_t const * a)3921 poly8x16_t test_vld1q_p8(poly8_t const * a) {
3922 return vld1q_p8(a);
3923 }
3924
3925 // CHECK-LABEL: @test_vld1q_p16(
3926 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
3927 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3928 // CHECK: ret <8 x i16> [[VLD1]]
test_vld1q_p16(poly16_t const * a)3929 poly16x8_t test_vld1q_p16(poly16_t const * a) {
3930 return vld1q_p16(a);
3931 }
3932
3933 // CHECK-LABEL: @test_vld1_u8(
3934 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
3935 // CHECK: ret <8 x i8> [[VLD1]]
test_vld1_u8(uint8_t const * a)3936 uint8x8_t test_vld1_u8(uint8_t const * a) {
3937 return vld1_u8(a);
3938 }
3939
3940 // CHECK-LABEL: @test_vld1_u16(
3941 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
3942 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
3943 // CHECK: ret <4 x i16> [[VLD1]]
test_vld1_u16(uint16_t const * a)3944 uint16x4_t test_vld1_u16(uint16_t const * a) {
3945 return vld1_u16(a);
3946 }
3947
3948 // CHECK-LABEL: @test_vld1_u32(
3949 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
3950 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
3951 // CHECK: ret <2 x i32> [[VLD1]]
test_vld1_u32(uint32_t const * a)3952 uint32x2_t test_vld1_u32(uint32_t const * a) {
3953 return vld1_u32(a);
3954 }
3955
3956 // CHECK-LABEL: @test_vld1_u64(
3957 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
3958 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
3959 // CHECK: ret <1 x i64> [[VLD1]]
test_vld1_u64(uint64_t const * a)3960 uint64x1_t test_vld1_u64(uint64_t const * a) {
3961 return vld1_u64(a);
3962 }
3963
3964 // CHECK-LABEL: @test_vld1_s8(
3965 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
3966 // CHECK: ret <8 x i8> [[VLD1]]
test_vld1_s8(int8_t const * a)3967 int8x8_t test_vld1_s8(int8_t const * a) {
3968 return vld1_s8(a);
3969 }
3970
3971 // CHECK-LABEL: @test_vld1_s16(
3972 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
3973 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
3974 // CHECK: ret <4 x i16> [[VLD1]]
test_vld1_s16(int16_t const * a)3975 int16x4_t test_vld1_s16(int16_t const * a) {
3976 return vld1_s16(a);
3977 }
3978
3979 // CHECK-LABEL: @test_vld1_s32(
3980 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
3981 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
3982 // CHECK: ret <2 x i32> [[VLD1]]
test_vld1_s32(int32_t const * a)3983 int32x2_t test_vld1_s32(int32_t const * a) {
3984 return vld1_s32(a);
3985 }
3986
3987 // CHECK-LABEL: @test_vld1_s64(
3988 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
3989 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
3990 // CHECK: ret <1 x i64> [[VLD1]]
test_vld1_s64(int64_t const * a)3991 int64x1_t test_vld1_s64(int64_t const * a) {
3992 return vld1_s64(a);
3993 }
3994
3995 // CHECK-LABEL: @test_vld1_f16(
3996 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
3997 // CHECK: [[VLD1:%.*]] = call <4 x half> @llvm.arm.neon.vld1.v4f16.p0i8(i8* [[TMP0]], i32 2)
3998 // CHECK: ret <4 x half> [[VLD1]]
test_vld1_f16(float16_t const * a)3999 float16x4_t test_vld1_f16(float16_t const * a) {
4000 return vld1_f16(a);
4001 }
4002
4003 // CHECK-LABEL: @test_vld1_f32(
4004 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4005 // CHECK: [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* [[TMP0]], i32 4)
4006 // CHECK: ret <2 x float> [[VLD1]]
test_vld1_f32(float32_t const * a)4007 float32x2_t test_vld1_f32(float32_t const * a) {
4008 return vld1_f32(a);
4009 }
4010
4011 // CHECK-LABEL: @test_vld1_p8(
4012 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
4013 // CHECK: ret <8 x i8> [[VLD1]]
test_vld1_p8(poly8_t const * a)4014 poly8x8_t test_vld1_p8(poly8_t const * a) {
4015 return vld1_p8(a);
4016 }
4017
4018 // CHECK-LABEL: @test_vld1_p16(
4019 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4020 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4021 // CHECK: ret <4 x i16> [[VLD1]]
test_vld1_p16(poly16_t const * a)4022 poly16x4_t test_vld1_p16(poly16_t const * a) {
4023 return vld1_p16(a);
4024 }
4025
4026 // CHECK-LABEL: @test_vld1q_dup_u8(
4027 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4028 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4029 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4030 // CHECK: ret <16 x i8> [[LANE]]
test_vld1q_dup_u8(uint8_t const * a)4031 uint8x16_t test_vld1q_dup_u8(uint8_t const * a) {
4032 return vld1q_dup_u8(a);
4033 }
4034
4035 // CHECK-LABEL: @test_vld1q_dup_u16(
4036 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4037 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4038 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4039 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4040 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4041 // CHECK: ret <8 x i16> [[LANE]]
test_vld1q_dup_u16(uint16_t const * a)4042 uint16x8_t test_vld1q_dup_u16(uint16_t const * a) {
4043 return vld1q_dup_u16(a);
4044 }
4045
4046 // CHECK-LABEL: @test_vld1q_dup_u32(
4047 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4048 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4049 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4050 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4051 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4052 // CHECK: ret <4 x i32> [[LANE]]
test_vld1q_dup_u32(uint32_t const * a)4053 uint32x4_t test_vld1q_dup_u32(uint32_t const * a) {
4054 return vld1q_dup_u32(a);
4055 }
4056
4057 // CHECK-LABEL: @test_vld1q_dup_u64(
4058 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4059 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4060 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4061 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4062 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4063 // CHECK: ret <2 x i64> [[LANE]]
test_vld1q_dup_u64(uint64_t const * a)4064 uint64x2_t test_vld1q_dup_u64(uint64_t const * a) {
4065 return vld1q_dup_u64(a);
4066 }
4067
4068 // CHECK-LABEL: @test_vld1q_dup_s8(
4069 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4070 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4071 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4072 // CHECK: ret <16 x i8> [[LANE]]
test_vld1q_dup_s8(int8_t const * a)4073 int8x16_t test_vld1q_dup_s8(int8_t const * a) {
4074 return vld1q_dup_s8(a);
4075 }
4076
4077 // CHECK-LABEL: @test_vld1q_dup_s16(
4078 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4079 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4080 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4081 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4082 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4083 // CHECK: ret <8 x i16> [[LANE]]
test_vld1q_dup_s16(int16_t const * a)4084 int16x8_t test_vld1q_dup_s16(int16_t const * a) {
4085 return vld1q_dup_s16(a);
4086 }
4087
4088 // CHECK-LABEL: @test_vld1q_dup_s32(
4089 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4090 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4091 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4092 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4093 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4094 // CHECK: ret <4 x i32> [[LANE]]
test_vld1q_dup_s32(int32_t const * a)4095 int32x4_t test_vld1q_dup_s32(int32_t const * a) {
4096 return vld1q_dup_s32(a);
4097 }
4098
4099 // CHECK-LABEL: @test_vld1q_dup_s64(
4100 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4101 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4102 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4103 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4104 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4105 // CHECK: ret <2 x i64> [[LANE]]
test_vld1q_dup_s64(int64_t const * a)4106 int64x2_t test_vld1q_dup_s64(int64_t const * a) {
4107 return vld1q_dup_s64(a);
4108 }
4109
4110 // CHECK-LABEL: @test_vld1q_dup_f16(
4111 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4112 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
4113 // CHECK: [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
4114 // CHECK: [[TMP3:%.*]] = insertelement <8 x half> undef, half [[TMP2]], i32 0
4115 // CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
4116 // CHECK: ret <8 x half> [[LANE]]
test_vld1q_dup_f16(float16_t const * a)4117 float16x8_t test_vld1q_dup_f16(float16_t const * a) {
4118 return vld1q_dup_f16(a);
4119 }
4120
4121 // CHECK-LABEL: @test_vld1q_dup_f32(
4122 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4123 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4124 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4125 // CHECK: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
4126 // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer
4127 // CHECK: ret <4 x float> [[LANE]]
test_vld1q_dup_f32(float32_t const * a)4128 float32x4_t test_vld1q_dup_f32(float32_t const * a) {
4129 return vld1q_dup_f32(a);
4130 }
4131
4132 // CHECK-LABEL: @test_vld1q_dup_p8(
4133 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4134 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4135 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4136 // CHECK: ret <16 x i8> [[LANE]]
test_vld1q_dup_p8(poly8_t const * a)4137 poly8x16_t test_vld1q_dup_p8(poly8_t const * a) {
4138 return vld1q_dup_p8(a);
4139 }
4140
4141 // CHECK-LABEL: @test_vld1q_dup_p16(
4142 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4143 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4144 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4145 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4146 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4147 // CHECK: ret <8 x i16> [[LANE]]
test_vld1q_dup_p16(poly16_t const * a)4148 poly16x8_t test_vld1q_dup_p16(poly16_t const * a) {
4149 return vld1q_dup_p16(a);
4150 }
4151
4152 // CHECK-LABEL: @test_vld1_dup_u8(
4153 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4154 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4155 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4156 // CHECK: ret <8 x i8> [[LANE]]
test_vld1_dup_u8(uint8_t const * a)4157 uint8x8_t test_vld1_dup_u8(uint8_t const * a) {
4158 return vld1_dup_u8(a);
4159 }
4160
4161 // CHECK-LABEL: @test_vld1_dup_u16(
4162 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4163 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4164 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4165 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4166 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4167 // CHECK: ret <4 x i16> [[LANE]]
test_vld1_dup_u16(uint16_t const * a)4168 uint16x4_t test_vld1_dup_u16(uint16_t const * a) {
4169 return vld1_dup_u16(a);
4170 }
4171
4172 // CHECK-LABEL: @test_vld1_dup_u32(
4173 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4174 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4175 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4176 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4177 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4178 // CHECK: ret <2 x i32> [[LANE]]
test_vld1_dup_u32(uint32_t const * a)4179 uint32x2_t test_vld1_dup_u32(uint32_t const * a) {
4180 return vld1_dup_u32(a);
4181 }
4182
4183 // CHECK-LABEL: @test_vld1_dup_u64(
4184 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4185 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4186 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4187 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4188 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4189 // CHECK: ret <1 x i64> [[LANE]]
test_vld1_dup_u64(uint64_t const * a)4190 uint64x1_t test_vld1_dup_u64(uint64_t const * a) {
4191 return vld1_dup_u64(a);
4192 }
4193
4194 // CHECK-LABEL: @test_vld1_dup_s8(
4195 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4196 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4197 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4198 // CHECK: ret <8 x i8> [[LANE]]
test_vld1_dup_s8(int8_t const * a)4199 int8x8_t test_vld1_dup_s8(int8_t const * a) {
4200 return vld1_dup_s8(a);
4201 }
4202
4203 // CHECK-LABEL: @test_vld1_dup_s16(
4204 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4205 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4206 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4207 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4208 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4209 // CHECK: ret <4 x i16> [[LANE]]
test_vld1_dup_s16(int16_t const * a)4210 int16x4_t test_vld1_dup_s16(int16_t const * a) {
4211 return vld1_dup_s16(a);
4212 }
4213
4214 // CHECK-LABEL: @test_vld1_dup_s32(
4215 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4216 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4217 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4218 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4219 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4220 // CHECK: ret <2 x i32> [[LANE]]
test_vld1_dup_s32(int32_t const * a)4221 int32x2_t test_vld1_dup_s32(int32_t const * a) {
4222 return vld1_dup_s32(a);
4223 }
4224
4225 // CHECK-LABEL: @test_vld1_dup_s64(
4226 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4227 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4228 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4229 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4230 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4231 // CHECK: ret <1 x i64> [[LANE]]
test_vld1_dup_s64(int64_t const * a)4232 int64x1_t test_vld1_dup_s64(int64_t const * a) {
4233 return vld1_dup_s64(a);
4234 }
4235
4236 // CHECK-LABEL: @test_vld1_dup_f16(
4237 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4238 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
4239 // CHECK: [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
4240 // CHECK: [[TMP3:%.*]] = insertelement <4 x half> undef, half [[TMP2]], i32 0
4241 // CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
4242 // CHECK: ret <4 x half> [[LANE]]
test_vld1_dup_f16(float16_t const * a)4243 float16x4_t test_vld1_dup_f16(float16_t const * a) {
4244 return vld1_dup_f16(a);
4245 }
4246
4247 // CHECK-LABEL: @test_vld1_dup_f32(
4248 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4249 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4250 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4251 // CHECK: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
4252 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
4253 // CHECK: ret <2 x float> [[LANE]]
test_vld1_dup_f32(float32_t const * a)4254 float32x2_t test_vld1_dup_f32(float32_t const * a) {
4255 return vld1_dup_f32(a);
4256 }
4257
4258 // CHECK-LABEL: @test_vld1_dup_p8(
4259 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4260 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4261 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4262 // CHECK: ret <8 x i8> [[LANE]]
test_vld1_dup_p8(poly8_t const * a)4263 poly8x8_t test_vld1_dup_p8(poly8_t const * a) {
4264 return vld1_dup_p8(a);
4265 }
4266
4267 // CHECK-LABEL: @test_vld1_dup_p16(
4268 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4269 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4270 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4271 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4272 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4273 // CHECK: ret <4 x i16> [[LANE]]
test_vld1_dup_p16(poly16_t const * a)4274 poly16x4_t test_vld1_dup_p16(poly16_t const * a) {
4275 return vld1_dup_p16(a);
4276 }
4277
4278 // CHECK-LABEL: @test_vld1q_lane_u8(
4279 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4280 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4281 // CHECK: ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_u8(uint8_t const * a,uint8x16_t b)4282 uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) {
4283 return vld1q_lane_u8(a, b, 15);
4284 }
4285
4286 // CHECK-LABEL: @test_vld1q_lane_u16(
4287 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4288 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4289 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4290 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4291 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4292 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4293 // CHECK: ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_u16(uint16_t const * a,uint16x8_t b)4294 uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) {
4295 return vld1q_lane_u16(a, b, 7);
4296 }
4297
4298 // CHECK-LABEL: @test_vld1q_lane_u32(
4299 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4300 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4301 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4302 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4303 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4304 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4305 // CHECK: ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_u32(uint32_t const * a,uint32x4_t b)4306 uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) {
4307 return vld1q_lane_u32(a, b, 3);
4308 }
4309
4310 // CHECK-LABEL: @test_vld1q_lane_u64(
4311 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4312 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4313 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4314 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4315 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4316 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4317 // CHECK: ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_u64(uint64_t const * a,uint64x2_t b)4318 uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) {
4319 return vld1q_lane_u64(a, b, 1);
4320 }
4321
4322 // CHECK-LABEL: @test_vld1q_lane_s8(
4323 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4324 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4325 // CHECK: ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_s8(int8_t const * a,int8x16_t b)4326 int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) {
4327 return vld1q_lane_s8(a, b, 15);
4328 }
4329
4330 // CHECK-LABEL: @test_vld1q_lane_s16(
4331 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4332 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4333 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4334 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4335 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4336 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4337 // CHECK: ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_s16(int16_t const * a,int16x8_t b)4338 int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) {
4339 return vld1q_lane_s16(a, b, 7);
4340 }
4341
4342 // CHECK-LABEL: @test_vld1q_lane_s32(
4343 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4344 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4345 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4346 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4347 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4348 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4349 // CHECK: ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_s32(int32_t const * a,int32x4_t b)4350 int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) {
4351 return vld1q_lane_s32(a, b, 3);
4352 }
4353
4354 // CHECK-LABEL: @test_vld1q_lane_s64(
4355 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4356 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4357 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4358 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4359 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4360 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4361 // CHECK: ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_s64(int64_t const * a,int64x2_t b)4362 int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
4363 return vld1q_lane_s64(a, b, 1);
4364 }
4365
4366 // CHECK-LABEL: @test_vld1q_lane_f16(
4367 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4368 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
4369 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
4370 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
4371 // CHECK: [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
4372 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
4373 // CHECK: ret <8 x half> [[VLD1_LANE]]
test_vld1q_lane_f16(float16_t const * a,float16x8_t b)4374 float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
4375 return vld1q_lane_f16(a, b, 7);
4376 }
4377
4378 // CHECK-LABEL: @test_vld1q_lane_f32(
4379 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4380 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4381 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4382 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4383 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4384 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3
4385 // CHECK: ret <4 x float> [[VLD1_LANE]]
test_vld1q_lane_f32(float32_t const * a,float32x4_t b)4386 float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) {
4387 return vld1q_lane_f32(a, b, 3);
4388 }
4389
4390 // CHECK-LABEL: @test_vld1q_lane_p8(
4391 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4392 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4393 // CHECK: ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_p8(poly8_t const * a,poly8x16_t b)4394 poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) {
4395 return vld1q_lane_p8(a, b, 15);
4396 }
4397
4398 // CHECK-LABEL: @test_vld1q_lane_p16(
4399 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4400 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4401 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4402 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4403 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4404 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4405 // CHECK: ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_p16(poly16_t const * a,poly16x8_t b)4406 poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) {
4407 return vld1q_lane_p16(a, b, 7);
4408 }
4409
4410 // CHECK-LABEL: @test_vld1_lane_u8(
4411 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4412 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4413 // CHECK: ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_u8(uint8_t const * a,uint8x8_t b)4414 uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) {
4415 return vld1_lane_u8(a, b, 7);
4416 }
4417
4418 // CHECK-LABEL: @test_vld1_lane_u16(
4419 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4420 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4421 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4422 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4423 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4424 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4425 // CHECK: ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_u16(uint16_t const * a,uint16x4_t b)4426 uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) {
4427 return vld1_lane_u16(a, b, 3);
4428 }
4429
4430 // CHECK-LABEL: @test_vld1_lane_u32(
4431 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4432 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4433 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4434 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4435 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4436 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4437 // CHECK: ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_u32(uint32_t const * a,uint32x2_t b)4438 uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) {
4439 return vld1_lane_u32(a, b, 1);
4440 }
4441
4442 // CHECK-LABEL: @test_vld1_lane_u64(
4443 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4444 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4445 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4446 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4447 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4448 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4449 // CHECK: ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_u64(uint64_t const * a,uint64x1_t b)4450 uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) {
4451 return vld1_lane_u64(a, b, 0);
4452 }
4453
4454 // CHECK-LABEL: @test_vld1_lane_s8(
4455 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4456 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4457 // CHECK: ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_s8(int8_t const * a,int8x8_t b)4458 int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) {
4459 return vld1_lane_s8(a, b, 7);
4460 }
4461
4462 // CHECK-LABEL: @test_vld1_lane_s16(
4463 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4464 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4465 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4466 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4467 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4468 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4469 // CHECK: ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_s16(int16_t const * a,int16x4_t b)4470 int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) {
4471 return vld1_lane_s16(a, b, 3);
4472 }
4473
4474 // CHECK-LABEL: @test_vld1_lane_s32(
4475 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
4476 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4477 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4478 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4479 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4480 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4481 // CHECK: ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_s32(int32_t const * a,int32x2_t b)4482 int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) {
4483 return vld1_lane_s32(a, b, 1);
4484 }
4485
4486 // CHECK-LABEL: @test_vld1_lane_s64(
4487 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
4488 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4489 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4490 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4491 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4492 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4493 // CHECK: ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_s64(int64_t const * a,int64x1_t b)4494 int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
4495 return vld1_lane_s64(a, b, 0);
4496 }
4497
4498 // CHECK-LABEL: @test_vld1_lane_f16(
4499 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
4500 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
4501 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
4502 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
4503 // CHECK: [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
4504 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
4505 // CHECK: ret <4 x half> [[VLD1_LANE]]
test_vld1_lane_f16(float16_t const * a,float16x4_t b)4506 float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
4507 return vld1_lane_f16(a, b, 3);
4508 }
4509
4510 // CHECK-LABEL: @test_vld1_lane_f32(
4511 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
4512 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4513 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4514 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4515 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4516 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1
4517 // CHECK: ret <2 x float> [[VLD1_LANE]]
test_vld1_lane_f32(float32_t const * a,float32x2_t b)4518 float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) {
4519 return vld1_lane_f32(a, b, 1);
4520 }
4521
4522 // CHECK-LABEL: @test_vld1_lane_p8(
4523 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1
4524 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4525 // CHECK: ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_p8(poly8_t const * a,poly8x8_t b)4526 poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) {
4527 return vld1_lane_p8(a, b, 7);
4528 }
4529
4530 // CHECK-LABEL: @test_vld1_lane_p16(
4531 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
4532 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4533 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4534 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4535 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4536 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4537 // CHECK: ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_p16(poly16_t const * a,poly16x4_t b)4538 poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) {
4539 return vld1_lane_p16(a, b, 3);
4540 }
4541
4542 // CHECK-LABEL: @test_vld2q_u8(
4543 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
4544 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
4545 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_u8(uint8_t const * a)4546 uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
4547 return vld2q_u8(a);
4548 }
4549
4550 // CHECK-LABEL: @test_vld2q_u16(
4551 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4552 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4553 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4554 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_u16(uint16_t const * a)4555 uint16x8x2_t test_vld2q_u16(uint16_t const * a) {
4556 return vld2q_u16(a);
4557 }
4558
4559 // CHECK-LABEL: @test_vld2q_u32(
4560 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4561 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4562 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4563 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_u32(uint32_t const * a)4564 uint32x4x2_t test_vld2q_u32(uint32_t const * a) {
4565 return vld2q_u32(a);
4566 }
4567
4568 // CHECK-LABEL: @test_vld2q_s8(
4569 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
4570 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
4571 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_s8(int8_t const * a)4572 int8x16x2_t test_vld2q_s8(int8_t const * a) {
4573 return vld2q_s8(a);
4574 }
4575
4576 // CHECK-LABEL: @test_vld2q_s16(
4577 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4578 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4579 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4580 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_s16(int16_t const * a)4581 int16x8x2_t test_vld2q_s16(int16_t const * a) {
4582 return vld2q_s16(a);
4583 }
4584
4585 // CHECK-LABEL: @test_vld2q_s32(
4586 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4587 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4588 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4589 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_s32(int32_t const * a)4590 int32x4x2_t test_vld2q_s32(int32_t const * a) {
4591 return vld2q_s32(a);
4592 }
4593
4594 // CHECK-LABEL: @test_vld2q_f16(
4595 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4596 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4597 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
4598 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x half>, <8 x half>
test_vld2q_f16(float16_t const * a)4599 float16x8x2_t test_vld2q_f16(float16_t const * a) {
4600 return vld2q_f16(a);
4601 }
4602
4603 // CHECK-LABEL: @test_vld2q_f32(
4604 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4605 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4606 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
4607 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float>
test_vld2q_f32(float32_t const * a)4608 float32x4x2_t test_vld2q_f32(float32_t const * a) {
4609 return vld2q_f32(a);
4610 }
4611
4612 // CHECK-LABEL: @test_vld2q_p8(
4613 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
4614 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
4615 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_p8(poly8_t const * a)4616 poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
4617 return vld2q_p8(a);
4618 }
4619
4620 // CHECK-LABEL: @test_vld2q_p16(
4621 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4622 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4623 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4624 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_p16(poly16_t const * a)4625 poly16x8x2_t test_vld2q_p16(poly16_t const * a) {
4626 return vld2q_p16(a);
4627 }
4628
4629 // CHECK-LABEL: @test_vld2_u8(
4630 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4631 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4632 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_u8(uint8_t const * a)4633 uint8x8x2_t test_vld2_u8(uint8_t const * a) {
4634 return vld2_u8(a);
4635 }
4636
4637 // CHECK-LABEL: @test_vld2_u16(
4638 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4639 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
4640 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4641 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_u16(uint16_t const * a)4642 uint16x4x2_t test_vld2_u16(uint16_t const * a) {
4643 return vld2_u16(a);
4644 }
4645
4646 // CHECK-LABEL: @test_vld2_u32(
4647 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
4648 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
4649 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4650 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_u32(uint32_t const * a)4651 uint32x2x2_t test_vld2_u32(uint32_t const * a) {
4652 return vld2_u32(a);
4653 }
4654
4655 // CHECK-LABEL: @test_vld2_u64(
4656 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
4657 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
4658 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
4659 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
test_vld2_u64(uint64_t const * a)4660 uint64x1x2_t test_vld2_u64(uint64_t const * a) {
4661 return vld2_u64(a);
4662 }
4663
4664 // CHECK-LABEL: @test_vld2_s8(
4665 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
4666 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
4667 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_s8(int8_t const * a)4668 int8x8x2_t test_vld2_s8(int8_t const * a) {
4669 return vld2_s8(a);
4670 }
4671
4672 // CHECK-LABEL: @test_vld2_s16(
4673 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
4674 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
4675 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4676 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_s16(int16_t const * a)4677 int16x4x2_t test_vld2_s16(int16_t const * a) {
4678 return vld2_s16(a);
4679 }
4680
4681 // CHECK-LABEL: @test_vld2_s32(
4682 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
4683 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
4684 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
4685 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_s32(int32_t const * a)4686 int32x2x2_t test_vld2_s32(int32_t const * a) {
4687 return vld2_s32(a);
4688 }
4689
4690 // CHECK-LABEL: @test_vld2_s64(
4691 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
4692 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
4693 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
4694 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
test_vld2_s64(int64_t const * a)4695 int64x1x2_t test_vld2_s64(int64_t const * a) {
4696 return vld2_s64(a);
4697 }
4698
4699 // CHECK-LABEL: @test_vld2_f16(
4700 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
4701 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
4702 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
4703 // CHECK: [[VLD2_V:%.*]] = call { <4 x half>, <4 x half>
test_vld2_f16(float16_t const * a)4704 float16x4x2_t test_vld2_f16(float16_t const * a) {
4705 return vld2_f16(a);
4706 }
4707
4708 // CHECK-LABEL: @test_vld2_f32(
4709 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
4710 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
4711 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
4712 // CHECK: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float>
test_vld2_f32(float32_t const * a)4713 float32x2x2_t test_vld2_f32(float32_t const * a) {
4714 return vld2_f32(a);
4715 }
4716
4717 // CHECK-LABEL: @test_vld2_p8(
4718 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
4719 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
4720 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_p8(poly8_t const * a)4721 poly8x8x2_t test_vld2_p8(poly8_t const * a) {
4722 return vld2_p8(a);
4723 }
4724
4725 // CHECK-LABEL: @test_vld2_p16(
4726 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
4727 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
4728 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
4729 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_p16(poly16_t const * a)4730 poly16x4x2_t test_vld2_p16(poly16_t const * a) {
4731 return vld2_p16(a);
4732 }
4733
4734 // CHECK-LABEL: @test_vld2q_lane_u16(
4735 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
4736 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
4737 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4738 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
4739 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4740 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4741 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
4742 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
4743 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4744 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4745 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
4746 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
4747 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4748 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4749 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4750 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
4751 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4752 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4753 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4754 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4755 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4756 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_u16(uint16_t const * a,uint16x8x2_t b)4757 uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) {
4758 return vld2q_lane_u16(a, b, 7);
4759 }
4760
4761 // CHECK-LABEL: @test_vld2q_lane_u32(
4762 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
4763 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
4764 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4765 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
4766 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
4767 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4768 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
4769 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
4770 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4771 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4772 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
4773 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
4774 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
4775 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
4776 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4777 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
4778 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
4779 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
4780 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4781 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4782 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4783 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_lane_u32(uint32_t const * a,uint32x4x2_t b)4784 uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) {
4785 return vld2q_lane_u32(a, b, 3);
4786 }
4787
4788 // CHECK-LABEL: @test_vld2q_lane_s16(
4789 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
4790 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
4791 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4792 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
4793 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4794 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4795 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
4796 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
4797 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4798 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4799 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
4800 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
4801 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4802 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4803 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4804 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
4805 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4806 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4807 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4808 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4809 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4810 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_s16(int16_t const * a,int16x8x2_t b)4811 int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) {
4812 return vld2q_lane_s16(a, b, 7);
4813 }
4814
4815 // CHECK-LABEL: @test_vld2q_lane_s32(
4816 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
4817 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
4818 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4819 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
4820 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
4821 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4822 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
4823 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
4824 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4825 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4826 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
4827 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
4828 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
4829 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
4830 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4831 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
4832 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
4833 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
4834 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4835 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4836 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4837 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_lane_s32(int32_t const * a,int32x4x2_t b)4838 int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
4839 return vld2q_lane_s32(a, b, 3);
4840 }
4841
4842 // CHECK-LABEL: @test_vld2q_lane_f16(
4843 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
4844 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
4845 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4846 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
4847 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
4848 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4849 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
4850 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
4851 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4852 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4853 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
4854 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
4855 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
4856 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
4857 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
4858 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
4859 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
4860 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
4861 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
4862 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
4863 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
4864 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>
test_vld2q_lane_f16(float16_t const * a,float16x8x2_t b)4865 float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
4866 return vld2q_lane_f16(a, b, 7);
4867 }
4868
4869 // CHECK-LABEL: @test_vld2q_lane_f32(
4870 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
4871 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
4872 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4873 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
4874 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
4875 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4876 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
4877 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
4878 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4879 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4880 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
4881 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
4882 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
4883 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
4884 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
4885 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
4886 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
4887 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
4888 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
4889 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
4890 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
4891 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>
test_vld2q_lane_f32(float32_t const * a,float32x4x2_t b)4892 float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) {
4893 return vld2q_lane_f32(a, b, 3);
4894 }
4895
4896 // CHECK-LABEL: @test_vld2q_lane_p16(
4897 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
4898 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
4899 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4900 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
4901 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4902 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4903 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
4904 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
4905 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4906 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4907 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
4908 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
4909 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4910 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4911 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4912 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
4913 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4914 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4915 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4916 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4917 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4918 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_p16(poly16_t const * a,poly16x8x2_t b)4919 poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) {
4920 return vld2q_lane_p16(a, b, 7);
4921 }
4922
4923 // CHECK-LABEL: @test_vld2_lane_u8(
4924 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
4925 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
4926 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4927 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
4928 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
4929 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
4930 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
4931 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
4932 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
4933 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4934 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
4935 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
4936 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
4937 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
4938 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
4939 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
4940 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_u8(uint8_t const * a,uint8x8x2_t b)4941 uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) {
4942 return vld2_lane_u8(a, b, 7);
4943 }
4944
4945 // CHECK-LABEL: @test_vld2_lane_u16(
4946 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
4947 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
4948 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4949 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
4950 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
4951 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
4952 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
4953 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
4954 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
4955 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
4956 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
4957 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
4958 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
4959 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
4960 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
4961 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
4962 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
4963 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
4964 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
4965 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
4966 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
4967 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_u16(uint16_t const * a,uint16x4x2_t b)4968 uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) {
4969 return vld2_lane_u16(a, b, 3);
4970 }
4971
4972 // CHECK-LABEL: @test_vld2_lane_u32(
4973 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
4974 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
4975 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
4976 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
4977 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
4978 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
4979 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
4980 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
4981 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
4982 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
4983 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
4984 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
4985 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
4986 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
4987 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
4988 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
4989 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
4990 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
4991 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
4992 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
4993 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
4994 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_lane_u32(uint32_t const * a,uint32x2x2_t b)4995 uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) {
4996 return vld2_lane_u32(a, b, 1);
4997 }
4998
4999 // CHECK-LABEL: @test_vld2_lane_s8(
5000 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
5001 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
5002 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
5003 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
5004 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5005 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5006 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
5007 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
5008 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5009 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5010 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5011 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5012 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5013 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5014 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5015 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5016 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_s8(int8_t const * a,int8x8x2_t b)5017 int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) {
5018 return vld2_lane_s8(a, b, 7);
5019 }
5020
5021 // CHECK-LABEL: @test_vld2_lane_s16(
5022 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
5023 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
5024 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
5025 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
5026 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5027 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5028 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
5029 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
5030 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5031 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5032 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5033 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5034 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5035 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5036 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5037 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5038 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5039 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5040 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5041 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5042 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5043 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_s16(int16_t const * a,int16x4x2_t b)5044 int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) {
5045 return vld2_lane_s16(a, b, 3);
5046 }
5047
5048 // CHECK-LABEL: @test_vld2_lane_s32(
5049 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
5050 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
5051 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
5052 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
5053 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
5054 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5055 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
5056 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
5057 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5058 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5059 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5060 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5061 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
5062 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5063 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5064 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5065 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5066 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5067 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5068 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5069 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5070 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_lane_s32(int32_t const * a,int32x2x2_t b)5071 int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
5072 return vld2_lane_s32(a, b, 1);
5073 }
5074
5075 // CHECK-LABEL: @test_vld2_lane_f16(
5076 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
5077 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
5078 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
5079 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
5080 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
5081 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5082 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
5083 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
5084 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5085 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5086 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
5087 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5088 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
5089 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
5090 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5091 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5092 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
5093 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
5094 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5095 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
5096 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
5097 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x half>, <4 x half>
test_vld2_lane_f16(float16_t const * a,float16x4x2_t b)5098 float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
5099 return vld2_lane_f16(a, b, 3);
5100 }
5101
5102 // CHECK-LABEL: @test_vld2_lane_f32(
5103 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
5104 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
5105 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
5106 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
5107 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
5108 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5109 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
5110 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
5111 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5112 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5113 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
5114 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5115 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
5116 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
5117 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5118 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5119 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
5120 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
5121 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5122 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5123 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5124 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float>
test_vld2_lane_f32(float32_t const * a,float32x2x2_t b)5125 float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) {
5126 return vld2_lane_f32(a, b, 1);
5127 }
5128
5129 // CHECK-LABEL: @test_vld2_lane_p8(
5130 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
5131 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
5132 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
5133 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
5134 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5135 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5136 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
5137 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
5138 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5139 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5140 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5141 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5142 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5143 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5144 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5145 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5146 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_p8(poly8_t const * a,poly8x8x2_t b)5147 poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) {
5148 return vld2_lane_p8(a, b, 7);
5149 }
5150
5151 // CHECK-LABEL: @test_vld2_lane_p16(
5152 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
5153 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
5154 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
5155 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
5156 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5157 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5158 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
5159 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
5160 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5161 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5162 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5163 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5164 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5165 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5166 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5167 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5168 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5169 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5170 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5171 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5172 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5173 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_p16(poly16_t const * a,poly16x4x2_t b)5174 poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) {
5175 return vld2_lane_p16(a, b, 3);
5176 }
5177
5178 // CHECK-LABEL: @test_vld3q_u8(
5179 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
5180 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
5181 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_u8(uint8_t const * a)5182 uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
5183 return vld3q_u8(a);
5184 }
5185
5186 // CHECK-LABEL: @test_vld3q_u16(
5187 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5188 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5189 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5190 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_u16(uint16_t const * a)5191 uint16x8x3_t test_vld3q_u16(uint16_t const * a) {
5192 return vld3q_u16(a);
5193 }
5194
5195 // CHECK-LABEL: @test_vld3q_u32(
5196 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5197 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5198 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5199 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_u32(uint32_t const * a)5200 uint32x4x3_t test_vld3q_u32(uint32_t const * a) {
5201 return vld3q_u32(a);
5202 }
5203
5204 // CHECK-LABEL: @test_vld3q_s8(
5205 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
5206 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
5207 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_s8(int8_t const * a)5208 int8x16x3_t test_vld3q_s8(int8_t const * a) {
5209 return vld3q_s8(a);
5210 }
5211
5212 // CHECK-LABEL: @test_vld3q_s16(
5213 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5214 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
5215 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5216 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_s16(int16_t const * a)5217 int16x8x3_t test_vld3q_s16(int16_t const * a) {
5218 return vld3q_s16(a);
5219 }
5220
5221 // CHECK-LABEL: @test_vld3q_s32(
5222 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5223 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
5224 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5225 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_s32(int32_t const * a)5226 int32x4x3_t test_vld3q_s32(int32_t const * a) {
5227 return vld3q_s32(a);
5228 }
5229
5230 // CHECK-LABEL: @test_vld3q_f16(
5231 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5232 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
5233 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
5234 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
test_vld3q_f16(float16_t const * a)5235 float16x8x3_t test_vld3q_f16(float16_t const * a) {
5236 return vld3q_f16(a);
5237 }
5238
5239 // CHECK-LABEL: @test_vld3q_f32(
5240 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5241 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
5242 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
5243 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
test_vld3q_f32(float32_t const * a)5244 float32x4x3_t test_vld3q_f32(float32_t const * a) {
5245 return vld3q_f32(a);
5246 }
5247
5248 // CHECK-LABEL: @test_vld3q_p8(
5249 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
5250 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
5251 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_p8(poly8_t const * a)5252 poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
5253 return vld3q_p8(a);
5254 }
5255
5256 // CHECK-LABEL: @test_vld3q_p16(
5257 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5258 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
5259 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5260 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_p16(poly16_t const * a)5261 poly16x8x3_t test_vld3q_p16(poly16_t const * a) {
5262 return vld3q_p16(a);
5263 }
5264
5265 // CHECK-LABEL: @test_vld3_u8(
5266 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5267 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
5268 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_u8(uint8_t const * a)5269 uint8x8x3_t test_vld3_u8(uint8_t const * a) {
5270 return vld3_u8(a);
5271 }
5272
5273 // CHECK-LABEL: @test_vld3_u16(
5274 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5275 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
5276 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5277 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_u16(uint16_t const * a)5278 uint16x4x3_t test_vld3_u16(uint16_t const * a) {
5279 return vld3_u16(a);
5280 }
5281
5282 // CHECK-LABEL: @test_vld3_u32(
5283 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5284 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
5285 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5286 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_u32(uint32_t const * a)5287 uint32x2x3_t test_vld3_u32(uint32_t const * a) {
5288 return vld3_u32(a);
5289 }
5290
5291 // CHECK-LABEL: @test_vld3_u64(
5292 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
5293 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
5294 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
5295 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
test_vld3_u64(uint64_t const * a)5296 uint64x1x3_t test_vld3_u64(uint64_t const * a) {
5297 return vld3_u64(a);
5298 }
5299
5300 // CHECK-LABEL: @test_vld3_s8(
5301 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5302 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
5303 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_s8(int8_t const * a)5304 int8x8x3_t test_vld3_s8(int8_t const * a) {
5305 return vld3_s8(a);
5306 }
5307
5308 // CHECK-LABEL: @test_vld3_s16(
5309 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5310 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
5311 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5312 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_s16(int16_t const * a)5313 int16x4x3_t test_vld3_s16(int16_t const * a) {
5314 return vld3_s16(a);
5315 }
5316
5317 // CHECK-LABEL: @test_vld3_s32(
5318 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5319 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
5320 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5321 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_s32(int32_t const * a)5322 int32x2x3_t test_vld3_s32(int32_t const * a) {
5323 return vld3_s32(a);
5324 }
5325
5326 // CHECK-LABEL: @test_vld3_s64(
5327 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
5328 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
5329 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
5330 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
test_vld3_s64(int64_t const * a)5331 int64x1x3_t test_vld3_s64(int64_t const * a) {
5332 return vld3_s64(a);
5333 }
5334
5335 // CHECK-LABEL: @test_vld3_f16(
5336 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5337 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
5338 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
5339 // CHECK: [[VLD3_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
test_vld3_f16(float16_t const * a)5340 float16x4x3_t test_vld3_f16(float16_t const * a) {
5341 return vld3_f16(a);
5342 }
5343
5344 // CHECK-LABEL: @test_vld3_f32(
5345 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5346 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
5347 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
5348 // CHECK: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
test_vld3_f32(float32_t const * a)5349 float32x2x3_t test_vld3_f32(float32_t const * a) {
5350 return vld3_f32(a);
5351 }
5352
5353 // CHECK-LABEL: @test_vld3_p8(
5354 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5355 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
5356 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_p8(poly8_t const * a)5357 poly8x8x3_t test_vld3_p8(poly8_t const * a) {
5358 return vld3_p8(a);
5359 }
5360
5361 // CHECK-LABEL: @test_vld3_p16(
5362 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5363 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
5364 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5365 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_p16(poly16_t const * a)5366 poly16x4x3_t test_vld3_p16(poly16_t const * a) {
5367 return vld3_p16(a);
5368 }
5369
5370 // CHECK-LABEL: @test_vld3q_lane_u16(
5371 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
5372 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
5373 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5374 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
5375 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5376 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5377 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
5378 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
5379 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5380 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5381 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5382 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5383 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5384 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5385 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5386 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5387 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5388 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5389 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5390 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5391 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5392 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5393 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5394 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5395 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5396 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5397 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_u16(uint16_t const * a,uint16x8x3_t b)5398 uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) {
5399 return vld3q_lane_u16(a, b, 7);
5400 }
5401
5402 // CHECK-LABEL: @test_vld3q_lane_u32(
5403 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
5404 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
5405 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5406 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
5407 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
5408 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5409 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
5410 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
5411 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5412 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5413 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5414 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5415 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
5416 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5417 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5418 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5419 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5420 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5421 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5422 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5423 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
5424 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
5425 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5426 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5427 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5428 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5429 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_lane_u32(uint32_t const * a,uint32x4x3_t b)5430 uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) {
5431 return vld3q_lane_u32(a, b, 3);
5432 }
5433
5434 // CHECK-LABEL: @test_vld3q_lane_s16(
5435 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
5436 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
5437 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5438 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
5439 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5440 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5441 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
5442 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
5443 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5444 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
5445 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5446 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5447 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5448 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5449 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5450 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5451 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5452 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5453 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5454 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5455 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5456 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5457 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5458 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5459 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5460 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5461 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_s16(int16_t const * a,int16x8x3_t b)5462 int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) {
5463 return vld3q_lane_s16(a, b, 7);
5464 }
5465
5466 // CHECK-LABEL: @test_vld3q_lane_s32(
5467 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
5468 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
5469 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5470 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
5471 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
5472 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5473 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
5474 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
5475 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5476 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
5477 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5478 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5479 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
5480 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5481 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5482 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5483 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5484 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5485 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5486 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5487 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
5488 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
5489 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5490 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5491 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5492 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5493 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_lane_s32(int32_t const * a,int32x4x3_t b)5494 int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
5495 return vld3q_lane_s32(a, b, 3);
5496 }
5497
5498 // CHECK-LABEL: @test_vld3q_lane_f16(
5499 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
5500 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
5501 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5502 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
5503 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
5504 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5505 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
5506 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
5507 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5508 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
5509 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
5510 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5511 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
5512 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
5513 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
5514 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5515 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
5516 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
5517 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
5518 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5519 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
5520 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
5521 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
5522 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
5523 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
5524 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
5525 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
test_vld3q_lane_f16(float16_t const * a,float16x8x3_t b)5526 float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
5527 return vld3q_lane_f16(a, b, 7);
5528 }
5529
5530 // CHECK-LABEL: @test_vld3q_lane_f32(
5531 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
5532 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
5533 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5534 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
5535 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
5536 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5537 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
5538 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
5539 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5540 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
5541 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
5542 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5543 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
5544 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
5545 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
5546 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5547 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
5548 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
5549 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
5550 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5551 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
5552 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
5553 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
5554 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
5555 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
5556 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
5557 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
test_vld3q_lane_f32(float32_t const * a,float32x4x3_t b)5558 float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) {
5559 return vld3q_lane_f32(a, b, 3);
5560 }
5561
5562 // CHECK-LABEL: @test_vld3q_lane_p16(
5563 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
5564 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
5565 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5566 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
5567 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5568 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5569 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
5570 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
5571 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5572 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
5573 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5574 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5575 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5576 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5577 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5578 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5579 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5580 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5581 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5582 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5583 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5584 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5585 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5586 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5587 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5588 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5589 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_p16(poly16_t const * a,poly16x8x3_t b)5590 poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) {
5591 return vld3q_lane_p16(a, b, 7);
5592 }
5593
5594 // CHECK-LABEL: @test_vld3_lane_u8(
5595 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
5596 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
5597 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5598 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
5599 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5600 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5601 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
5602 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
5603 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5604 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
5605 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5606 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5607 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5608 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5609 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5610 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5611 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5612 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5613 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5614 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_u8(uint8_t const * a,uint8x8x3_t b)5615 uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) {
5616 return vld3_lane_u8(a, b, 7);
5617 }
5618
5619 // CHECK-LABEL: @test_vld3_lane_u16(
5620 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
5621 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
5622 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5623 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
5624 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5625 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5626 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
5627 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
5628 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5629 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
5630 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5631 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5632 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5633 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5634 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5635 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5636 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5637 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5638 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5639 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5640 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5641 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5642 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5643 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5644 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5645 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5646 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_u16(uint16_t const * a,uint16x4x3_t b)5647 uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) {
5648 return vld3_lane_u16(a, b, 3);
5649 }
5650
5651 // CHECK-LABEL: @test_vld3_lane_u32(
5652 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
5653 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
5654 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5655 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
5656 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
5657 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5658 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
5659 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
5660 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5661 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
5662 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5663 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5664 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
5665 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5666 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5667 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5668 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5669 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5670 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5671 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5672 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
5673 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
5674 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5675 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5676 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5677 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5678 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_lane_u32(uint32_t const * a,uint32x2x3_t b)5679 uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) {
5680 return vld3_lane_u32(a, b, 1);
5681 }
5682
5683 // CHECK-LABEL: @test_vld3_lane_s8(
5684 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
5685 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
5686 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5687 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
5688 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5689 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5690 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
5691 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
5692 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5693 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
5694 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5695 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5696 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5697 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5698 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5699 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5700 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5701 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5702 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5703 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_s8(int8_t const * a,int8x8x3_t b)5704 int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) {
5705 return vld3_lane_s8(a, b, 7);
5706 }
5707
5708 // CHECK-LABEL: @test_vld3_lane_s16(
5709 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
5710 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
5711 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5712 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
5713 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5714 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5715 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
5716 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
5717 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5718 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
5719 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5720 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5721 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5722 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5723 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5724 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5725 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5726 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5727 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5728 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5729 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5730 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5731 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5732 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5733 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5734 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5735 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_s16(int16_t const * a,int16x4x3_t b)5736 int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) {
5737 return vld3_lane_s16(a, b, 3);
5738 }
5739
5740 // CHECK-LABEL: @test_vld3_lane_s32(
5741 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
5742 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
5743 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5744 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
5745 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
5746 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5747 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
5748 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
5749 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5750 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
5751 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
5752 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5753 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
5754 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5755 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5756 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5757 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5758 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5759 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5760 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5761 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
5762 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
5763 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5764 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5765 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5766 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5767 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_lane_s32(int32_t const * a,int32x2x3_t b)5768 int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
5769 return vld3_lane_s32(a, b, 1);
5770 }
5771
5772 // CHECK-LABEL: @test_vld3_lane_f16(
5773 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
5774 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
5775 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5776 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
5777 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
5778 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5779 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
5780 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
5781 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5782 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
5783 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
5784 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5785 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
5786 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
5787 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5788 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5789 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
5790 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
5791 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5792 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5793 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
5794 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
5795 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
5796 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
5797 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
5798 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
5799 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
test_vld3_lane_f16(float16_t const * a,float16x4x3_t b)5800 float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
5801 return vld3_lane_f16(a, b, 3);
5802 }
5803
5804 // CHECK-LABEL: @test_vld3_lane_f32(
5805 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
5806 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
5807 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5808 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
5809 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
5810 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5811 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
5812 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
5813 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5814 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
5815 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
5816 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5817 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
5818 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
5819 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5820 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5821 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
5822 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
5823 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5824 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5825 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
5826 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
5827 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
5828 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5829 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5830 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
5831 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
test_vld3_lane_f32(float32_t const * a,float32x2x3_t b)5832 float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) {
5833 return vld3_lane_f32(a, b, 1);
5834 }
5835
5836 // CHECK-LABEL: @test_vld3_lane_p8(
5837 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
5838 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
5839 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5840 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
5841 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5842 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5843 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
5844 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
5845 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5846 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
5847 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5848 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5849 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5850 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5851 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5852 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5853 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5854 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5855 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5856 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_p8(poly8_t const * a,poly8x8x3_t b)5857 poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) {
5858 return vld3_lane_p8(a, b, 7);
5859 }
5860
5861 // CHECK-LABEL: @test_vld3_lane_p16(
5862 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
5863 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
5864 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5865 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
5866 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5867 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5868 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
5869 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
5870 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5871 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
5872 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
5873 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5874 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5875 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5876 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5877 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5878 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5879 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5880 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5881 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5882 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5883 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5884 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5885 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5886 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5887 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5888 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_p16(poly16_t const * a,poly16x4x3_t b)5889 poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) {
5890 return vld3_lane_p16(a, b, 3);
5891 }
5892
5893 // CHECK-LABEL: @test_vld4q_u8(
5894 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
5895 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
5896 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_u8(uint8_t const * a)5897 uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
5898 return vld4q_u8(a);
5899 }
5900
5901 // CHECK-LABEL: @test_vld4q_u16(
5902 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
5903 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
5904 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5905 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_u16(uint16_t const * a)5906 uint16x8x4_t test_vld4q_u16(uint16_t const * a) {
5907 return vld4q_u16(a);
5908 }
5909
5910 // CHECK-LABEL: @test_vld4q_u32(
5911 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
5912 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
5913 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5914 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_u32(uint32_t const * a)5915 uint32x4x4_t test_vld4q_u32(uint32_t const * a) {
5916 return vld4q_u32(a);
5917 }
5918
5919 // CHECK-LABEL: @test_vld4q_s8(
5920 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
5921 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
5922 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_s8(int8_t const * a)5923 int8x16x4_t test_vld4q_s8(int8_t const * a) {
5924 return vld4q_s8(a);
5925 }
5926
5927 // CHECK-LABEL: @test_vld4q_s16(
5928 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
5929 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
5930 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5931 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_s16(int16_t const * a)5932 int16x8x4_t test_vld4q_s16(int16_t const * a) {
5933 return vld4q_s16(a);
5934 }
5935
5936 // CHECK-LABEL: @test_vld4q_s32(
5937 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
5938 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
5939 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
5940 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_s32(int32_t const * a)5941 int32x4x4_t test_vld4q_s32(int32_t const * a) {
5942 return vld4q_s32(a);
5943 }
5944
5945 // CHECK-LABEL: @test_vld4q_f16(
5946 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
5947 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
5948 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
5949 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
test_vld4q_f16(float16_t const * a)5950 float16x8x4_t test_vld4q_f16(float16_t const * a) {
5951 return vld4q_f16(a);
5952 }
5953
5954 // CHECK-LABEL: @test_vld4q_f32(
5955 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
5956 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
5957 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
5958 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
test_vld4q_f32(float32_t const * a)5959 float32x4x4_t test_vld4q_f32(float32_t const * a) {
5960 return vld4q_f32(a);
5961 }
5962
5963 // CHECK-LABEL: @test_vld4q_p8(
5964 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
5965 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
5966 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_p8(poly8_t const * a)5967 poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
5968 return vld4q_p8(a);
5969 }
5970
5971 // CHECK-LABEL: @test_vld4q_p16(
5972 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
5973 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
5974 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5975 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_p16(poly16_t const * a)5976 poly16x8x4_t test_vld4q_p16(poly16_t const * a) {
5977 return vld4q_p16(a);
5978 }
5979
5980 // CHECK-LABEL: @test_vld4_u8(
5981 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
5982 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
5983 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_u8(uint8_t const * a)5984 uint8x8x4_t test_vld4_u8(uint8_t const * a) {
5985 return vld4_u8(a);
5986 }
5987
5988 // CHECK-LABEL: @test_vld4_u16(
5989 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
5990 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
5991 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
5992 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_u16(uint16_t const * a)5993 uint16x4x4_t test_vld4_u16(uint16_t const * a) {
5994 return vld4_u16(a);
5995 }
5996
5997 // CHECK-LABEL: @test_vld4_u32(
5998 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
5999 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
6000 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
6001 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_u32(uint32_t const * a)6002 uint32x2x4_t test_vld4_u32(uint32_t const * a) {
6003 return vld4_u32(a);
6004 }
6005
6006 // CHECK-LABEL: @test_vld4_u64(
6007 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
6008 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
6009 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
6010 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
test_vld4_u64(uint64_t const * a)6011 uint64x1x4_t test_vld4_u64(uint64_t const * a) {
6012 return vld4_u64(a);
6013 }
6014
6015 // CHECK-LABEL: @test_vld4_s8(
6016 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
6017 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
6018 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_s8(int8_t const * a)6019 int8x8x4_t test_vld4_s8(int8_t const * a) {
6020 return vld4_s8(a);
6021 }
6022
6023 // CHECK-LABEL: @test_vld4_s16(
6024 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
6025 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
6026 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6027 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_s16(int16_t const * a)6028 int16x4x4_t test_vld4_s16(int16_t const * a) {
6029 return vld4_s16(a);
6030 }
6031
6032 // CHECK-LABEL: @test_vld4_s32(
6033 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
6034 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
6035 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
6036 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_s32(int32_t const * a)6037 int32x2x4_t test_vld4_s32(int32_t const * a) {
6038 return vld4_s32(a);
6039 }
6040
6041 // CHECK-LABEL: @test_vld4_s64(
6042 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
6043 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
6044 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
6045 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
test_vld4_s64(int64_t const * a)6046 int64x1x4_t test_vld4_s64(int64_t const * a) {
6047 return vld4_s64(a);
6048 }
6049
6050 // CHECK-LABEL: @test_vld4_f16(
6051 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
6052 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
6053 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
6054 // CHECK: [[VLD4_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
test_vld4_f16(float16_t const * a)6055 float16x4x4_t test_vld4_f16(float16_t const * a) {
6056 return vld4_f16(a);
6057 }
6058
6059 // CHECK-LABEL: @test_vld4_f32(
6060 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
6061 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
6062 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
6063 // CHECK: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
test_vld4_f32(float32_t const * a)6064 float32x2x4_t test_vld4_f32(float32_t const * a) {
6065 return vld4_f32(a);
6066 }
6067
6068 // CHECK-LABEL: @test_vld4_p8(
6069 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
6070 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
6071 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_p8(poly8_t const * a)6072 poly8x8x4_t test_vld4_p8(poly8_t const * a) {
6073 return vld4_p8(a);
6074 }
6075
6076 // CHECK-LABEL: @test_vld4_p16(
6077 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
6078 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
6079 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
6080 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_p16(poly16_t const * a)6081 poly16x4x4_t test_vld4_p16(poly16_t const * a) {
6082 return vld4_p16(a);
6083 }
6084
6085 // CHECK-LABEL: @test_vld4q_lane_u16(
6086 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
6087 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
6088 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
6089 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
6090 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6091 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6092 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
6093 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
6094 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6095 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
6096 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6097 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6098 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6099 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6100 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6101 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6102 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6103 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6104 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6105 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6106 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6107 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6108 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6109 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6110 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6111 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6112 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6113 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6114 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6115 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6116 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6117 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_u16(uint16_t const * a,uint16x8x4_t b)6118 uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) {
6119 return vld4q_lane_u16(a, b, 7);
6120 }
6121
6122 // CHECK-LABEL: @test_vld4q_lane_u32(
6123 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
6124 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
6125 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
6126 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
6127 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
6128 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6129 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
6130 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
6131 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6132 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
6133 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6134 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6135 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
6136 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6137 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6138 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6139 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6140 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6141 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6142 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6143 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6144 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6145 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6146 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6147 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
6148 // CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
6149 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
6150 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6151 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6152 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6153 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
6154 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_lane_u32(uint32_t const * a,uint32x4x4_t b)6155 uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) {
6156 return vld4q_lane_u32(a, b, 3);
6157 }
6158
6159 // CHECK-LABEL: @test_vld4q_lane_s16(
6160 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
6161 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
6162 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
6163 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
6164 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6165 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6166 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
6167 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
6168 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6169 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
6170 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6171 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6172 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6173 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6174 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6175 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6176 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6177 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6178 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6179 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6180 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6181 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6182 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6183 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6184 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6185 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6186 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6187 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6188 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6189 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6190 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6191 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_s16(int16_t const * a,int16x8x4_t b)6192 int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) {
6193 return vld4q_lane_s16(a, b, 7);
6194 }
6195
6196 // CHECK-LABEL: @test_vld4q_lane_s32(
6197 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
6198 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
6199 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
6200 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
6201 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
6202 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6203 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
6204 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
6205 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6206 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
6207 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6208 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6209 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
6210 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6211 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6212 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6213 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6214 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6215 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6216 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6217 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6218 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6219 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6220 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6221 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
6222 // CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
6223 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
6224 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6225 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6226 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6227 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
6228 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_lane_s32(int32_t const * a,int32x4x4_t b)6229 int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
6230 return vld4q_lane_s32(a, b, 3);
6231 }
6232
6233 // CHECK-LABEL: @test_vld4q_lane_f16(
6234 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
6235 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
6236 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
6237 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
6238 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
6239 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6240 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
6241 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
6242 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6243 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
6244 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
6245 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6246 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
6247 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
6248 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
6249 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6250 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
6251 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
6252 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
6253 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6254 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
6255 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
6256 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
6257 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6258 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
6259 // CHECK: [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
6260 // CHECK: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8>
6261 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
6262 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
6263 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
6264 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x half>
6265 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
test_vld4q_lane_f16(float16_t const * a,float16x8x4_t b)6266 float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
6267 return vld4q_lane_f16(a, b, 7);
6268 }
6269
6270 // CHECK-LABEL: @test_vld4q_lane_f32(
6271 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
6272 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
6273 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
6274 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
6275 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
6276 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6277 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
6278 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
6279 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6280 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
6281 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
6282 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6283 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
6284 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
6285 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
6286 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6287 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
6288 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
6289 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
6290 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6291 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
6292 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
6293 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
6294 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6295 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
6296 // CHECK: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
6297 // CHECK: [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8>
6298 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
6299 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
6300 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
6301 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float>
6302 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
test_vld4q_lane_f32(float32_t const * a,float32x4x4_t b)6303 float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) {
6304 return vld4q_lane_f32(a, b, 3);
6305 }
6306
6307 // CHECK-LABEL: @test_vld4q_lane_p16(
6308 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
6309 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
6310 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
6311 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
6312 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6313 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6314 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
6315 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
6316 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6317 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
6318 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6319 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6320 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6321 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6322 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6323 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6324 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6325 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6326 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6327 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6328 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6329 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6330 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6331 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6332 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6333 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6334 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6335 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6336 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6337 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6338 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6339 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_p16(poly16_t const * a,poly16x8x4_t b)6340 poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) {
6341 return vld4q_lane_p16(a, b, 7);
6342 }
6343
6344 // CHECK-LABEL: @test_vld4_lane_u8(
6345 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
6346 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
6347 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
6348 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
6349 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6350 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6351 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
6352 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
6353 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6354 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
6355 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6356 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6357 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6358 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6359 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6360 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6361 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6362 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6363 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6364 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6365 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6366 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6367 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_u8(uint8_t const * a,uint8x8x4_t b)6368 uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) {
6369 return vld4_lane_u8(a, b, 7);
6370 }
6371
6372 // CHECK-LABEL: @test_vld4_lane_u16(
6373 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
6374 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
6375 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
6376 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
6377 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6378 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6379 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
6380 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
6381 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6382 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
6383 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6384 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6385 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6386 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6387 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6388 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6389 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6390 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6391 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6392 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6393 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6394 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6395 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6396 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6397 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6398 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6399 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6400 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6401 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6402 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6403 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6404 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_u16(uint16_t const * a,uint16x4x4_t b)6405 uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) {
6406 return vld4_lane_u16(a, b, 3);
6407 }
6408
6409 // CHECK-LABEL: @test_vld4_lane_u32(
6410 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
6411 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
6412 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
6413 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
6414 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
6415 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6416 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
6417 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
6418 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6419 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
6420 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6421 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6422 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
6423 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6424 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6425 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6426 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6427 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6428 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6429 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6430 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
6431 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
6432 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6433 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6434 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
6435 // CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
6436 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6437 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6438 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6439 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6440 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6441 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_lane_u32(uint32_t const * a,uint32x2x4_t b)6442 uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) {
6443 return vld4_lane_u32(a, b, 1);
6444 }
6445
6446 // CHECK-LABEL: @test_vld4_lane_s8(
6447 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
6448 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
6449 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
6450 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
6451 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6452 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6453 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
6454 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
6455 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6456 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
6457 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6458 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6459 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6460 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6461 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6462 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6463 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6464 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6465 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6466 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6467 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6468 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6469 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_s8(int8_t const * a,int8x8x4_t b)6470 int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) {
6471 return vld4_lane_s8(a, b, 7);
6472 }
6473
6474 // CHECK-LABEL: @test_vld4_lane_s16(
6475 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
6476 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
6477 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
6478 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
6479 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6480 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6481 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
6482 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
6483 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6484 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
6485 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6486 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6487 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6488 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6489 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6490 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6491 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6492 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6493 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6494 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6495 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6496 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6497 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6498 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6499 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6500 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6501 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6502 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6503 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6504 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6505 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6506 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_s16(int16_t const * a,int16x4x4_t b)6507 int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) {
6508 return vld4_lane_s16(a, b, 3);
6509 }
6510
6511 // CHECK-LABEL: @test_vld4_lane_s32(
6512 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
6513 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
6514 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
6515 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
6516 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
6517 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6518 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
6519 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
6520 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6521 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
6522 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
6523 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6524 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
6525 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6526 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6527 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6528 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6529 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6530 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6531 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6532 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
6533 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
6534 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6535 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6536 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
6537 // CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
6538 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6539 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6540 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6541 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6542 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6543 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_lane_s32(int32_t const * a,int32x2x4_t b)6544 int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
6545 return vld4_lane_s32(a, b, 1);
6546 }
6547
6548 // CHECK-LABEL: @test_vld4_lane_f16(
6549 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
6550 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
6551 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
6552 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
6553 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
6554 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6555 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
6556 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
6557 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6558 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
6559 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
6560 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6561 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
6562 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
6563 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
6564 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6565 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
6566 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
6567 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
6568 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6569 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
6570 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
6571 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
6572 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6573 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
6574 // CHECK: [[TMP11:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
6575 // CHECK: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8>
6576 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
6577 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
6578 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
6579 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
6580 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
test_vld4_lane_f16(float16_t const * a,float16x4x4_t b)6581 float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
6582 return vld4_lane_f16(a, b, 3);
6583 }
6584
6585 // CHECK-LABEL: @test_vld4_lane_f32(
6586 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
6587 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
6588 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
6589 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
6590 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
6591 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6592 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
6593 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
6594 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6595 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
6596 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
6597 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6598 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
6599 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
6600 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
6601 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6602 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
6603 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
6604 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
6605 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6606 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
6607 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
6608 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
6609 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6610 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
6611 // CHECK: [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
6612 // CHECK: [[TMP12:%.*]] = bitcast <2 x float> [[TMP11]] to <8 x i8>
6613 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
6614 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
6615 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
6616 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float>
6617 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
test_vld4_lane_f32(float32_t const * a,float32x2x4_t b)6618 float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) {
6619 return vld4_lane_f32(a, b, 1);
6620 }
6621
6622 // CHECK-LABEL: @test_vld4_lane_p8(
6623 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
6624 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
6625 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
6626 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
6627 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6628 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6629 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
6630 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
6631 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6632 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
6633 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6634 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6635 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6636 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6637 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6638 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6639 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6640 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6641 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6642 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6643 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6644 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6645 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_p8(poly8_t const * a,poly8x8x4_t b)6646 poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) {
6647 return vld4_lane_p8(a, b, 7);
6648 }
6649
6650 // CHECK-LABEL: @test_vld4_lane_p16(
6651 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
6652 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
6653 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
6654 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
6655 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6656 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6657 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
6658 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
6659 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6660 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
6661 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
6662 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6663 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6664 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6665 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6666 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6667 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6668 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6669 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6670 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6671 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6672 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6673 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6674 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6675 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6676 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6677 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6678 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6679 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6680 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6681 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6682 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_p16(poly16_t const * a,poly16x4x4_t b)6683 poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) {
6684 return vld4_lane_p16(a, b, 3);
6685 }
6686
6687 // CHECK-LABEL: @test_vmax_s8(
6688 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
6689 // CHECK: ret <8 x i8> [[VMAX_V_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)6690 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
6691 return vmax_s8(a, b);
6692 }
6693
6694 // CHECK-LABEL: @test_vmax_s16(
6695 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6696 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6697 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
6698 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6699 // CHECK: ret <4 x i16> [[VMAX_V2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)6700 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
6701 return vmax_s16(a, b);
6702 }
6703
6704 // CHECK-LABEL: @test_vmax_s32(
6705 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6706 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6707 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
6708 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6709 // CHECK: ret <2 x i32> [[VMAX_V2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)6710 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
6711 return vmax_s32(a, b);
6712 }
6713
6714 // CHECK-LABEL: @test_vmax_u8(
6715 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
6716 // CHECK: ret <8 x i8> [[VMAX_V_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)6717 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
6718 return vmax_u8(a, b);
6719 }
6720
6721 // CHECK-LABEL: @test_vmax_u16(
6722 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6723 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6724 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
6725 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6726 // CHECK: ret <4 x i16> [[VMAX_V2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)6727 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
6728 return vmax_u16(a, b);
6729 }
6730
6731 // CHECK-LABEL: @test_vmax_u32(
6732 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6733 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6734 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
6735 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6736 // CHECK: ret <2 x i32> [[VMAX_V2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)6737 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
6738 return vmax_u32(a, b);
6739 }
6740
6741 // CHECK-LABEL: @test_vmax_f32(
6742 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6743 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6744 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b)
6745 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8>
6746 // CHECK: ret <2 x float> [[VMAX_V2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)6747 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
6748 return vmax_f32(a, b);
6749 }
6750
6751 // CHECK-LABEL: @test_vmaxq_s8(
6752 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b)
6753 // CHECK: ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)6754 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
6755 return vmaxq_s8(a, b);
6756 }
6757
6758 // CHECK-LABEL: @test_vmaxq_s16(
6759 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6760 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6761 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b)
6762 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6763 // CHECK: ret <8 x i16> [[VMAXQ_V2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)6764 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
6765 return vmaxq_s16(a, b);
6766 }
6767
6768 // CHECK-LABEL: @test_vmaxq_s32(
6769 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6770 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6771 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b)
6772 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6773 // CHECK: ret <4 x i32> [[VMAXQ_V2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)6774 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
6775 return vmaxq_s32(a, b);
6776 }
6777
6778 // CHECK-LABEL: @test_vmaxq_u8(
6779 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b)
6780 // CHECK: ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)6781 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
6782 return vmaxq_u8(a, b);
6783 }
6784
6785 // CHECK-LABEL: @test_vmaxq_u16(
6786 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6787 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6788 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b)
6789 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6790 // CHECK: ret <8 x i16> [[VMAXQ_V2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)6791 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
6792 return vmaxq_u16(a, b);
6793 }
6794
6795 // CHECK-LABEL: @test_vmaxq_u32(
6796 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6797 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6798 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b)
6799 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6800 // CHECK: ret <4 x i32> [[VMAXQ_V2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)6801 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
6802 return vmaxq_u32(a, b);
6803 }
6804
6805 // CHECK-LABEL: @test_vmaxq_f32(
6806 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6807 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6808 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b)
6809 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8>
6810 // CHECK: ret <4 x float> [[VMAXQ_V2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)6811 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
6812 return vmaxq_f32(a, b);
6813 }
6814
6815 // CHECK-LABEL: @test_vmin_s8(
6816 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b)
6817 // CHECK: ret <8 x i8> [[VMIN_V_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)6818 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
6819 return vmin_s8(a, b);
6820 }
6821
6822 // CHECK-LABEL: @test_vmin_s16(
6823 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6824 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6825 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b)
6826 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6827 // CHECK: ret <4 x i16> [[VMIN_V2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)6828 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
6829 return vmin_s16(a, b);
6830 }
6831
6832 // CHECK-LABEL: @test_vmin_s32(
6833 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6834 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6835 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b)
6836 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6837 // CHECK: ret <2 x i32> [[VMIN_V2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)6838 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
6839 return vmin_s32(a, b);
6840 }
6841
6842 // CHECK-LABEL: @test_vmin_u8(
6843 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b)
6844 // CHECK: ret <8 x i8> [[VMIN_V_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)6845 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
6846 return vmin_u8(a, b);
6847 }
6848
6849 // CHECK-LABEL: @test_vmin_u16(
6850 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6851 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6852 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b)
6853 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6854 // CHECK: ret <4 x i16> [[VMIN_V2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)6855 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
6856 return vmin_u16(a, b);
6857 }
6858
6859 // CHECK-LABEL: @test_vmin_u32(
6860 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6861 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6862 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b)
6863 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6864 // CHECK: ret <2 x i32> [[VMIN_V2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)6865 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
6866 return vmin_u32(a, b);
6867 }
6868
6869 // CHECK-LABEL: @test_vmin_f32(
6870 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6871 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6872 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b)
6873 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8>
6874 // CHECK: ret <2 x float> [[VMIN_V2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)6875 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
6876 return vmin_f32(a, b);
6877 }
6878
6879 // CHECK-LABEL: @test_vminq_s8(
6880 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b)
6881 // CHECK: ret <16 x i8> [[VMINQ_V_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)6882 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
6883 return vminq_s8(a, b);
6884 }
6885
6886 // CHECK-LABEL: @test_vminq_s16(
6887 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6888 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6889 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b)
6890 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6891 // CHECK: ret <8 x i16> [[VMINQ_V2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)6892 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
6893 return vminq_s16(a, b);
6894 }
6895
6896 // CHECK-LABEL: @test_vminq_s32(
6897 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6898 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6899 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b)
6900 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6901 // CHECK: ret <4 x i32> [[VMINQ_V2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)6902 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
6903 return vminq_s32(a, b);
6904 }
6905
6906 // CHECK-LABEL: @test_vminq_u8(
6907 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b)
6908 // CHECK: ret <16 x i8> [[VMINQ_V_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)6909 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
6910 return vminq_u8(a, b);
6911 }
6912
6913 // CHECK-LABEL: @test_vminq_u16(
6914 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6915 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6916 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b)
6917 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6918 // CHECK: ret <8 x i16> [[VMINQ_V2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)6919 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
6920 return vminq_u16(a, b);
6921 }
6922
6923 // CHECK-LABEL: @test_vminq_u32(
6924 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6925 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6926 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b)
6927 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6928 // CHECK: ret <4 x i32> [[VMINQ_V2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)6929 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
6930 return vminq_u32(a, b);
6931 }
6932
6933 // CHECK-LABEL: @test_vminq_f32(
6934 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6935 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6936 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b)
6937 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8>
6938 // CHECK: ret <4 x float> [[VMINQ_V2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)6939 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
6940 return vminq_f32(a, b);
6941 }
6942
6943 // CHECK-LABEL: @test_vmla_s8(
6944 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
6945 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
6946 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t a,int8x8_t b,int8x8_t c)6947 int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
6948 return vmla_s8(a, b, c);
6949 }
6950
6951 // CHECK-LABEL: @test_vmla_s16(
6952 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
6953 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
6954 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_s16(int16x4_t a,int16x4_t b,int16x4_t c)6955 int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
6956 return vmla_s16(a, b, c);
6957 }
6958
6959 // CHECK-LABEL: @test_vmla_s32(
6960 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
6961 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
6962 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t a,int32x2_t b,int32x2_t c)6963 int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
6964 return vmla_s32(a, b, c);
6965 }
6966
6967 // CHECK-LABEL: @test_vmla_f32(
6968 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
6969 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
6970 // CHECK: ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t a,float32x2_t b,float32x2_t c)6971 float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
6972 return vmla_f32(a, b, c);
6973 }
6974
6975 // CHECK-LABEL: @test_vmla_u8(
6976 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
6977 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
6978 // CHECK: ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)6979 uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
6980 return vmla_u8(a, b, c);
6981 }
6982
6983 // CHECK-LABEL: @test_vmla_u16(
6984 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
6985 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
6986 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)6987 uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
6988 return vmla_u16(a, b, c);
6989 }
6990
6991 // CHECK-LABEL: @test_vmla_u32(
6992 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
6993 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
6994 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)6995 uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
6996 return vmla_u32(a, b, c);
6997 }
6998
6999 // CHECK-LABEL: @test_vmlaq_s8(
7000 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7001 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
7002 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)7003 int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
7004 return vmlaq_s8(a, b, c);
7005 }
7006
7007 // CHECK-LABEL: @test_vmlaq_s16(
7008 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7009 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7010 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)7011 int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
7012 return vmlaq_s16(a, b, c);
7013 }
7014
7015 // CHECK-LABEL: @test_vmlaq_s32(
7016 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7017 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7018 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)7019 int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
7020 return vmlaq_s32(a, b, c);
7021 }
7022
7023 // CHECK-LABEL: @test_vmlaq_f32(
7024 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
7025 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
7026 // CHECK: ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7027 float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
7028 return vmlaq_f32(a, b, c);
7029 }
7030
7031 // CHECK-LABEL: @test_vmlaq_u8(
7032 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7033 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
7034 // CHECK: ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)7035 uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
7036 return vmlaq_u8(a, b, c);
7037 }
7038
7039 // CHECK-LABEL: @test_vmlaq_u16(
7040 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7041 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7042 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)7043 uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
7044 return vmlaq_u16(a, b, c);
7045 }
7046
7047 // CHECK-LABEL: @test_vmlaq_u32(
7048 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7049 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7050 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)7051 uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
7052 return vmlaq_u32(a, b, c);
7053 }
7054
7055 // CHECK-LABEL: @test_vmlal_s8(
7056 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
7057 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7058 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7059 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7060 return vmlal_s8(a, b, c);
7061 }
7062
7063 // CHECK-LABEL: @test_vmlal_s16(
7064 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7065 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7066 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
7067 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7068 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7069 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7070 return vmlal_s16(a, b, c);
7071 }
7072
7073 // CHECK-LABEL: @test_vmlal_s32(
7074 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7075 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7076 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
7077 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7078 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7079 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7080 return vmlal_s32(a, b, c);
7081 }
7082
7083 // CHECK-LABEL: @test_vmlal_u8(
7084 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
7085 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7086 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7087 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7088 return vmlal_u8(a, b, c);
7089 }
7090
7091 // CHECK-LABEL: @test_vmlal_u16(
7092 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7093 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7094 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
7095 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7096 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7097 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7098 return vmlal_u16(a, b, c);
7099 }
7100
7101 // CHECK-LABEL: @test_vmlal_u32(
7102 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7103 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7104 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
7105 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7106 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7107 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7108 return vmlal_u32(a, b, c);
7109 }
7110
7111 // CHECK-LABEL: @test_vmlal_lane_s16(
7112 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7113 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7114 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7115 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7116 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7117 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
7118 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
7119 // CHECK: ret <4 x i32> [[ADD]]
test_vmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)7120 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7121 return vmlal_lane_s16(a, b, c, 3);
7122 }
7123
7124 // CHECK-LABEL: @test_vmlal_lane_s32(
7125 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7126 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7127 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7128 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7129 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7130 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
7131 // CHECK: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
7132 // CHECK: ret <2 x i64> [[ADD]]
test_vmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)7133 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7134 return vmlal_lane_s32(a, b, c, 1);
7135 }
7136
7137 // CHECK-LABEL: @test_vmlal_lane_u16(
7138 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7139 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7140 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7141 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7142 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7143 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
7144 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
7145 // CHECK: ret <4 x i32> [[ADD]]
test_vmlal_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7146 uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7147 return vmlal_lane_u16(a, b, c, 3);
7148 }
7149
7150 // CHECK-LABEL: @test_vmlal_lane_u32(
7151 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7152 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7153 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7154 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7155 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7156 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
7157 // CHECK: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
7158 // CHECK: ret <2 x i64> [[ADD]]
test_vmlal_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7159 uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7160 return vmlal_lane_u32(a, b, c, 1);
7161 }
7162
7163 // CHECK-LABEL: @test_vmlal_n_s16(
7164 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7165 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7166 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7167 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7168 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7169 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7170 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7171 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7172 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)7173 int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
7174 return vmlal_n_s16(a, b, c);
7175 }
7176
7177 // CHECK-LABEL: @test_vmlal_n_s32(
7178 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7179 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7180 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7181 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7182 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7183 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7184 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)7185 int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
7186 return vmlal_n_s32(a, b, c);
7187 }
7188
7189 // CHECK-LABEL: @test_vmlal_n_u16(
7190 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7191 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7192 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7193 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7194 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7195 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7196 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7197 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7198 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlal_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)7199 uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
7200 return vmlal_n_u16(a, b, c);
7201 }
7202
7203 // CHECK-LABEL: @test_vmlal_n_u32(
7204 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7205 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7206 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7207 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7208 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7209 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7210 // CHECK: ret <2 x i64> [[ADD_I]]
test_vmlal_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)7211 uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
7212 return vmlal_n_u32(a, b, c);
7213 }
7214
7215 // CHECK-LABEL: @test_vmla_lane_s16(
7216 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7217 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7218 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7219 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7220 // CHECK: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
7221 // CHECK: ret <4 x i16> [[ADD]]
test_vmla_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)7222 int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7223 return vmla_lane_s16(a, b, c, 3);
7224 }
7225
7226 // CHECK-LABEL: @test_vmla_lane_s32(
7227 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7228 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7229 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7230 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7231 // CHECK: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
7232 // CHECK: ret <2 x i32> [[ADD]]
test_vmla_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)7233 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7234 return vmla_lane_s32(a, b, c, 1);
7235 }
7236
7237 // CHECK-LABEL: @test_vmla_lane_u16(
7238 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7239 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7240 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7241 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7242 // CHECK: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
7243 // CHECK: ret <4 x i16> [[ADD]]
test_vmla_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7244 uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7245 return vmla_lane_u16(a, b, c, 3);
7246 }
7247
7248 // CHECK-LABEL: @test_vmla_lane_u32(
7249 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7250 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7251 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7252 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7253 // CHECK: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
7254 // CHECK: ret <2 x i32> [[ADD]]
test_vmla_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7255 uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7256 return vmla_lane_u32(a, b, c, 1);
7257 }
7258
7259 // CHECK-LABEL: @test_vmla_lane_f32(
7260 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7261 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7262 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
7263 // CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
7264 // CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
7265 // CHECK: ret <2 x float> [[ADD]]
test_vmla_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)7266 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7267 return vmla_lane_f32(a, b, c, 1);
7268 }
7269
7270 // CHECK-LABEL: @test_vmlaq_lane_s16(
7271 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7272 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7273 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7274 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7275 // CHECK: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
7276 // CHECK: ret <8 x i16> [[ADD]]
test_vmlaq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)7277 int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
7278 return vmlaq_lane_s16(a, b, c, 3);
7279 }
7280
7281 // CHECK-LABEL: @test_vmlaq_lane_s32(
7282 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7283 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7284 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7285 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7286 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
7287 // CHECK: ret <4 x i32> [[ADD]]
test_vmlaq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)7288 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
7289 return vmlaq_lane_s32(a, b, c, 1);
7290 }
7291
7292 // CHECK-LABEL: @test_vmlaq_lane_u16(
7293 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7294 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7295 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7296 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7297 // CHECK: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
7298 // CHECK: ret <8 x i16> [[ADD]]
test_vmlaq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)7299 uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
7300 return vmlaq_lane_u16(a, b, c, 3);
7301 }
7302
7303 // CHECK-LABEL: @test_vmlaq_lane_u32(
7304 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7305 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7306 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7307 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7308 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
7309 // CHECK: ret <4 x i32> [[ADD]]
test_vmlaq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)7310 uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
7311 return vmlaq_lane_u32(a, b, c, 1);
7312 }
7313
7314 // CHECK-LABEL: @test_vmlaq_lane_f32(
7315 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7316 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7317 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7318 // CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
7319 // CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
7320 // CHECK: ret <4 x float> [[ADD]]
test_vmlaq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)7321 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
7322 return vmlaq_lane_f32(a, b, c, 1);
7323 }
7324
7325 // CHECK-LABEL: @test_vmla_n_s16(
7326 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7327 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7328 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7329 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7330 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7331 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7332 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_n_s16(int16x4_t a,int16x4_t b,int16_t c)7333 int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
7334 return vmla_n_s16(a, b, c);
7335 }
7336
7337 // CHECK-LABEL: @test_vmla_n_s32(
7338 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7339 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7340 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7341 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7342 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_n_s32(int32x2_t a,int32x2_t b,int32_t c)7343 int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
7344 return vmla_n_s32(a, b, c);
7345 }
7346
7347 // CHECK-LABEL: @test_vmla_n_u16(
7348 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7349 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7350 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7351 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7352 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7353 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7354 // CHECK: ret <4 x i16> [[ADD_I]]
test_vmla_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)7355 uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
7356 return vmla_n_u16(a, b, c);
7357 }
7358
7359 // CHECK-LABEL: @test_vmla_n_u32(
7360 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7361 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7362 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7363 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7364 // CHECK: ret <2 x i32> [[ADD_I]]
test_vmla_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)7365 uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
7366 return vmla_n_u32(a, b, c);
7367 }
7368
7369 // CHECK-LABEL: @test_vmla_n_f32(
7370 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
7371 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
7372 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
7373 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
7374 // CHECK: ret <2 x float> [[ADD_I]]
test_vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)7375 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
7376 return vmla_n_f32(a, b, c);
7377 }
7378
7379 // CHECK-LABEL: @test_vmlaq_n_s16(
7380 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7381 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7382 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7383 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7384 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7385 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7386 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7387 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7388 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7389 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7390 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_n_s16(int16x8_t a,int16x8_t b,int16_t c)7391 int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
7392 return vmlaq_n_s16(a, b, c);
7393 }
7394
7395 // CHECK-LABEL: @test_vmlaq_n_s32(
7396 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7397 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7398 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7399 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7400 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7401 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7402 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_n_s32(int32x4_t a,int32x4_t b,int32_t c)7403 int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7404 return vmlaq_n_s32(a, b, c);
7405 }
7406
7407 // CHECK-LABEL: @test_vmlaq_n_u16(
7408 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7409 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7410 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7411 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7412 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7413 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7414 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7415 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7416 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7417 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7418 // CHECK: ret <8 x i16> [[ADD_I]]
test_vmlaq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)7419 uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7420 return vmlaq_n_u16(a, b, c);
7421 }
7422
7423 // CHECK-LABEL: @test_vmlaq_n_u32(
7424 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7425 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7426 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7427 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7428 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7429 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7430 // CHECK: ret <4 x i32> [[ADD_I]]
test_vmlaq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)7431 uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7432 return vmlaq_n_u32(a, b, c);
7433 }
7434
7435 // CHECK-LABEL: @test_vmlaq_n_f32(
7436 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7437 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7438 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7439 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7440 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7441 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
7442 // CHECK: ret <4 x float> [[ADD_I]]
test_vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7443 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7444 return vmlaq_n_f32(a, b, c);
7445 }
7446
7447 // CHECK-LABEL: @test_vmls_s8(
7448 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7449 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7450 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t a,int8x8_t b,int8x8_t c)7451 int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
7452 return vmls_s8(a, b, c);
7453 }
7454
7455 // CHECK-LABEL: @test_vmls_s16(
7456 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7457 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7458 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_s16(int16x4_t a,int16x4_t b,int16x4_t c)7459 int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7460 return vmls_s16(a, b, c);
7461 }
7462
7463 // CHECK-LABEL: @test_vmls_s32(
7464 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7465 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7466 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t a,int32x2_t b,int32x2_t c)7467 int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7468 return vmls_s32(a, b, c);
7469 }
7470
7471 // CHECK-LABEL: @test_vmls_f32(
7472 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
7473 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7474 // CHECK: ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t a,float32x2_t b,float32x2_t c)7475 float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7476 return vmls_f32(a, b, c);
7477 }
7478
7479 // CHECK-LABEL: @test_vmls_u8(
7480 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7481 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7482 // CHECK: ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)7483 uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
7484 return vmls_u8(a, b, c);
7485 }
7486
7487 // CHECK-LABEL: @test_vmls_u16(
7488 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7489 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7490 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7491 uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7492 return vmls_u16(a, b, c);
7493 }
7494
7495 // CHECK-LABEL: @test_vmls_u32(
7496 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7497 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7498 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7499 uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7500 return vmls_u32(a, b, c);
7501 }
7502
7503 // CHECK-LABEL: @test_vmlsq_s8(
7504 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7505 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7506 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t a,int8x16_t b,int8x16_t c)7507 int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
7508 return vmlsq_s8(a, b, c);
7509 }
7510
7511 // CHECK-LABEL: @test_vmlsq_s16(
7512 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7513 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7514 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t a,int16x8_t b,int16x8_t c)7515 int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
7516 return vmlsq_s16(a, b, c);
7517 }
7518
7519 // CHECK-LABEL: @test_vmlsq_s32(
7520 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7521 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7522 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t a,int32x4_t b,int32x4_t c)7523 int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
7524 return vmlsq_s32(a, b, c);
7525 }
7526
7527 // CHECK-LABEL: @test_vmlsq_f32(
7528 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
7529 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7530 // CHECK: ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7531 float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
7532 return vmlsq_f32(a, b, c);
7533 }
7534
7535 // CHECK-LABEL: @test_vmlsq_u8(
7536 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7537 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7538 // CHECK: ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)7539 uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
7540 return vmlsq_u8(a, b, c);
7541 }
7542
7543 // CHECK-LABEL: @test_vmlsq_u16(
7544 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7545 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7546 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)7547 uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
7548 return vmlsq_u16(a, b, c);
7549 }
7550
7551 // CHECK-LABEL: @test_vmlsq_u32(
7552 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7553 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7554 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)7555 uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
7556 return vmlsq_u32(a, b, c);
7557 }
7558
7559 // CHECK-LABEL: @test_vmlsl_s8(
7560 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
7561 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7562 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)7563 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7564 return vmlsl_s8(a, b, c);
7565 }
7566
7567 // CHECK-LABEL: @test_vmlsl_s16(
7568 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7569 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7570 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
7571 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7572 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)7573 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7574 return vmlsl_s16(a, b, c);
7575 }
7576
7577 // CHECK-LABEL: @test_vmlsl_s32(
7578 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7579 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7580 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
7581 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7582 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)7583 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7584 return vmlsl_s32(a, b, c);
7585 }
7586
7587 // CHECK-LABEL: @test_vmlsl_u8(
7588 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
7589 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7590 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7591 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7592 return vmlsl_u8(a, b, c);
7593 }
7594
7595 // CHECK-LABEL: @test_vmlsl_u16(
7596 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7597 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7598 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
7599 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7600 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7601 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7602 return vmlsl_u16(a, b, c);
7603 }
7604
7605 // CHECK-LABEL: @test_vmlsl_u32(
7606 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7607 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7608 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
7609 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7610 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7611 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7612 return vmlsl_u32(a, b, c);
7613 }
7614
7615 // CHECK-LABEL: @test_vmlsl_lane_s16(
7616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7617 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7618 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7619 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7620 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7621 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
7622 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7623 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)7624 int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7625 return vmlsl_lane_s16(a, b, c, 3);
7626 }
7627
7628 // CHECK-LABEL: @test_vmlsl_lane_s32(
7629 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7630 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7631 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7632 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7633 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7634 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
7635 // CHECK: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7636 // CHECK: ret <2 x i64> [[SUB]]
test_vmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)7637 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7638 return vmlsl_lane_s32(a, b, c, 1);
7639 }
7640
7641 // CHECK-LABEL: @test_vmlsl_lane_u16(
7642 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7643 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7644 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7645 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7646 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7647 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
7648 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7649 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsl_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7650 uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7651 return vmlsl_lane_u16(a, b, c, 3);
7652 }
7653
7654 // CHECK-LABEL: @test_vmlsl_lane_u32(
7655 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7656 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7657 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7658 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7659 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7660 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
7661 // CHECK: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7662 // CHECK: ret <2 x i64> [[SUB]]
test_vmlsl_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7663 uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7664 return vmlsl_lane_u32(a, b, c, 1);
7665 }
7666
7667 // CHECK-LABEL: @test_vmlsl_n_s16(
7668 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7669 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7670 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7671 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7672 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7673 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7674 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7675 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7676 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)7677 int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
7678 return vmlsl_n_s16(a, b, c);
7679 }
7680
7681 // CHECK-LABEL: @test_vmlsl_n_s32(
7682 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7683 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7684 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7685 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7686 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7687 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7688 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)7689 int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
7690 return vmlsl_n_s32(a, b, c);
7691 }
7692
7693 // CHECK-LABEL: @test_vmlsl_n_u16(
7694 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7695 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7696 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7697 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7698 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7699 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7700 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7701 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7702 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsl_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)7703 uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
7704 return vmlsl_n_u16(a, b, c);
7705 }
7706
7707 // CHECK-LABEL: @test_vmlsl_n_u32(
7708 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7709 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7710 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7711 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7712 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7713 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7714 // CHECK: ret <2 x i64> [[SUB_I]]
test_vmlsl_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)7715 uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
7716 return vmlsl_n_u32(a, b, c);
7717 }
7718
7719 // CHECK-LABEL: @test_vmls_lane_s16(
7720 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7721 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7722 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7723 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7724 // CHECK: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7725 // CHECK: ret <4 x i16> [[SUB]]
test_vmls_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)7726 int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7727 return vmls_lane_s16(a, b, c, 3);
7728 }
7729
7730 // CHECK-LABEL: @test_vmls_lane_s32(
7731 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7732 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7733 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7734 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7735 // CHECK: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7736 // CHECK: ret <2 x i32> [[SUB]]
test_vmls_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)7737 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7738 return vmls_lane_s32(a, b, c, 1);
7739 }
7740
7741 // CHECK-LABEL: @test_vmls_lane_u16(
7742 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7743 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7744 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7745 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7746 // CHECK: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7747 // CHECK: ret <4 x i16> [[SUB]]
test_vmls_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7748 uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7749 return vmls_lane_u16(a, b, c, 3);
7750 }
7751
7752 // CHECK-LABEL: @test_vmls_lane_u32(
7753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7754 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7755 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7756 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7757 // CHECK: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7758 // CHECK: ret <2 x i32> [[SUB]]
test_vmls_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7759 uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7760 return vmls_lane_u32(a, b, c, 1);
7761 }
7762
7763 // CHECK-LABEL: @test_vmls_lane_f32(
7764 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7765 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7766 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
7767 // CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
7768 // CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
7769 // CHECK: ret <2 x float> [[SUB]]
test_vmls_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)7770 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7771 return vmls_lane_f32(a, b, c, 1);
7772 }
7773
7774 // CHECK-LABEL: @test_vmlsq_lane_s16(
7775 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7776 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7777 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7778 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7779 // CHECK: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7780 // CHECK: ret <8 x i16> [[SUB]]
test_vmlsq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)7781 int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
7782 return vmlsq_lane_s16(a, b, c, 3);
7783 }
7784
7785 // CHECK-LABEL: @test_vmlsq_lane_s32(
7786 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7787 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7788 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7789 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7790 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7791 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)7792 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
7793 return vmlsq_lane_s32(a, b, c, 1);
7794 }
7795
7796 // CHECK-LABEL: @test_vmlsq_lane_u16(
7797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7798 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7799 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7800 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7801 // CHECK: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7802 // CHECK: ret <8 x i16> [[SUB]]
test_vmlsq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)7803 uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
7804 return vmlsq_lane_u16(a, b, c, 3);
7805 }
7806
7807 // CHECK-LABEL: @test_vmlsq_lane_u32(
7808 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7809 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7810 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7811 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7812 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7813 // CHECK: ret <4 x i32> [[SUB]]
test_vmlsq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)7814 uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
7815 return vmlsq_lane_u32(a, b, c, 1);
7816 }
7817
7818 // CHECK-LABEL: @test_vmlsq_lane_f32(
7819 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7820 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7821 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7822 // CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
7823 // CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
7824 // CHECK: ret <4 x float> [[SUB]]
test_vmlsq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)7825 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
7826 return vmlsq_lane_f32(a, b, c, 1);
7827 }
7828
7829 // CHECK-LABEL: @test_vmls_n_s16(
7830 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7831 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7832 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7833 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7834 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7835 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7836 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_n_s16(int16x4_t a,int16x4_t b,int16_t c)7837 int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
7838 return vmls_n_s16(a, b, c);
7839 }
7840
7841 // CHECK-LABEL: @test_vmls_n_s32(
7842 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7843 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7844 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7845 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7846 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_n_s32(int32x2_t a,int32x2_t b,int32_t c)7847 int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
7848 return vmls_n_s32(a, b, c);
7849 }
7850
7851 // CHECK-LABEL: @test_vmls_n_u16(
7852 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7853 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7854 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7855 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7856 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7857 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7858 // CHECK: ret <4 x i16> [[SUB_I]]
test_vmls_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)7859 uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
7860 return vmls_n_u16(a, b, c);
7861 }
7862
7863 // CHECK-LABEL: @test_vmls_n_u32(
7864 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7865 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7866 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7867 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7868 // CHECK: ret <2 x i32> [[SUB_I]]
test_vmls_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)7869 uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
7870 return vmls_n_u32(a, b, c);
7871 }
7872
7873 // CHECK-LABEL: @test_vmls_n_f32(
7874 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
7875 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
7876 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
7877 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7878 // CHECK: ret <2 x float> [[SUB_I]]
test_vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)7879 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
7880 return vmls_n_f32(a, b, c);
7881 }
7882
7883 // CHECK-LABEL: @test_vmlsq_n_s16(
7884 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7885 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7886 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7887 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7888 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7889 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7890 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7891 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7892 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7893 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7894 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_n_s16(int16x8_t a,int16x8_t b,int16_t c)7895 int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
7896 return vmlsq_n_s16(a, b, c);
7897 }
7898
7899 // CHECK-LABEL: @test_vmlsq_n_s32(
7900 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7901 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7902 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7903 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7904 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7905 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7906 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_n_s32(int32x4_t a,int32x4_t b,int32_t c)7907 int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7908 return vmlsq_n_s32(a, b, c);
7909 }
7910
7911 // CHECK-LABEL: @test_vmlsq_n_u16(
7912 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7913 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7914 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7915 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7916 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7917 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7918 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7919 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7920 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7921 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7922 // CHECK: ret <8 x i16> [[SUB_I]]
test_vmlsq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)7923 uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7924 return vmlsq_n_u16(a, b, c);
7925 }
7926
7927 // CHECK-LABEL: @test_vmlsq_n_u32(
7928 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7929 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7930 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7931 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7932 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7933 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7934 // CHECK: ret <4 x i32> [[SUB_I]]
test_vmlsq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)7935 uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7936 return vmlsq_n_u32(a, b, c);
7937 }
7938
7939 // CHECK-LABEL: @test_vmlsq_n_f32(
7940 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7941 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7942 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7943 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7944 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7945 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7946 // CHECK: ret <4 x float> [[SUB_I]]
test_vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7947 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7948 return vmlsq_n_f32(a, b, c);
7949 }
7950
7951 // CHECK-LABEL: @test_vmovl_s8(
7952 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7953 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)7954 int16x8_t test_vmovl_s8(int8x8_t a) {
7955 return vmovl_s8(a);
7956 }
7957
7958 // CHECK-LABEL: @test_vmovl_s16(
7959 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7960 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
7961 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)7962 int32x4_t test_vmovl_s16(int16x4_t a) {
7963 return vmovl_s16(a);
7964 }
7965
7966 // CHECK-LABEL: @test_vmovl_s32(
7967 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7968 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
7969 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)7970 int64x2_t test_vmovl_s32(int32x2_t a) {
7971 return vmovl_s32(a);
7972 }
7973
7974 // CHECK-LABEL: @test_vmovl_u8(
7975 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7976 // CHECK: ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)7977 uint16x8_t test_vmovl_u8(uint8x8_t a) {
7978 return vmovl_u8(a);
7979 }
7980
7981 // CHECK-LABEL: @test_vmovl_u16(
7982 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7983 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
7984 // CHECK: ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)7985 uint32x4_t test_vmovl_u16(uint16x4_t a) {
7986 return vmovl_u16(a);
7987 }
7988
7989 // CHECK-LABEL: @test_vmovl_u32(
7990 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7991 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
7992 // CHECK: ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)7993 uint64x2_t test_vmovl_u32(uint32x2_t a) {
7994 return vmovl_u32(a);
7995 }
7996
7997 // CHECK-LABEL: @test_vmovn_s16(
7998 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7999 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
8000 // CHECK: ret <8 x i8> [[VMOVN_I]]
test_vmovn_s16(int16x8_t a)8001 int8x8_t test_vmovn_s16(int16x8_t a) {
8002 return vmovn_s16(a);
8003 }
8004
8005 // CHECK-LABEL: @test_vmovn_s32(
8006 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8007 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
8008 // CHECK: ret <4 x i16> [[VMOVN_I]]
test_vmovn_s32(int32x4_t a)8009 int16x4_t test_vmovn_s32(int32x4_t a) {
8010 return vmovn_s32(a);
8011 }
8012
8013 // CHECK-LABEL: @test_vmovn_s64(
8014 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8015 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
8016 // CHECK: ret <2 x i32> [[VMOVN_I]]
test_vmovn_s64(int64x2_t a)8017 int32x2_t test_vmovn_s64(int64x2_t a) {
8018 return vmovn_s64(a);
8019 }
8020
8021 // CHECK-LABEL: @test_vmovn_u16(
8022 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8023 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
8024 // CHECK: ret <8 x i8> [[VMOVN_I]]
test_vmovn_u16(uint16x8_t a)8025 uint8x8_t test_vmovn_u16(uint16x8_t a) {
8026 return vmovn_u16(a);
8027 }
8028
8029 // CHECK-LABEL: @test_vmovn_u32(
8030 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8031 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
8032 // CHECK: ret <4 x i16> [[VMOVN_I]]
test_vmovn_u32(uint32x4_t a)8033 uint16x4_t test_vmovn_u32(uint32x4_t a) {
8034 return vmovn_u32(a);
8035 }
8036
8037 // CHECK-LABEL: @test_vmovn_u64(
8038 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8039 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
8040 // CHECK: ret <2 x i32> [[VMOVN_I]]
test_vmovn_u64(uint64x2_t a)8041 uint32x2_t test_vmovn_u64(uint64x2_t a) {
8042 return vmovn_u64(a);
8043 }
8044
8045 // CHECK-LABEL: @test_vmov_n_u8(
8046 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8047 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8048 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8049 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8050 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8051 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8052 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8053 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8054 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_u8(uint8_t a)8055 uint8x8_t test_vmov_n_u8(uint8_t a) {
8056 return vmov_n_u8(a);
8057 }
8058
8059 // CHECK-LABEL: @test_vmov_n_u16(
8060 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8061 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8062 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8063 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8064 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_u16(uint16_t a)8065 uint16x4_t test_vmov_n_u16(uint16_t a) {
8066 return vmov_n_u16(a);
8067 }
8068
8069 // CHECK-LABEL: @test_vmov_n_u32(
8070 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
8071 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
8072 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_u32(uint32_t a)8073 uint32x2_t test_vmov_n_u32(uint32_t a) {
8074 return vmov_n_u32(a);
8075 }
8076
8077 // CHECK-LABEL: @test_vmov_n_s8(
8078 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8079 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8080 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8081 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8082 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8083 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8084 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8085 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8086 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_s8(int8_t a)8087 int8x8_t test_vmov_n_s8(int8_t a) {
8088 return vmov_n_s8(a);
8089 }
8090
8091 // CHECK-LABEL: @test_vmov_n_s16(
8092 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8093 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8094 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8095 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8096 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_s16(int16_t a)8097 int16x4_t test_vmov_n_s16(int16_t a) {
8098 return vmov_n_s16(a);
8099 }
8100
8101 // CHECK-LABEL: @test_vmov_n_s32(
8102 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
8103 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
8104 // CHECK: ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_s32(int32_t a)8105 int32x2_t test_vmov_n_s32(int32_t a) {
8106 return vmov_n_s32(a);
8107 }
8108
8109 // CHECK-LABEL: @test_vmov_n_p8(
8110 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8111 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8112 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8113 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8114 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8115 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8116 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8117 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8118 // CHECK: ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_p8(poly8_t a)8119 poly8x8_t test_vmov_n_p8(poly8_t a) {
8120 return vmov_n_p8(a);
8121 }
8122
8123 // CHECK-LABEL: @test_vmov_n_p16(
8124 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8125 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8126 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8127 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8128 // CHECK: ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_p16(poly16_t a)8129 poly16x4_t test_vmov_n_p16(poly16_t a) {
8130 return vmov_n_p16(a);
8131 }
8132
8133 // CHECK-LABEL: @test_vmov_n_f16(
8134 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
8135 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
8136 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
8137 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
8138 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
8139 // CHECK: ret <4 x half> [[VECINIT3]]
test_vmov_n_f16(float16_t * a)8140 float16x4_t test_vmov_n_f16(float16_t *a) {
8141 return vmov_n_f16(*a);
8142 }
8143
8144 // CHECK-LABEL: @test_vmov_n_f32(
8145 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
8146 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
8147 // CHECK: ret <2 x float> [[VECINIT1_I]]
test_vmov_n_f32(float32_t a)8148 float32x2_t test_vmov_n_f32(float32_t a) {
8149 return vmov_n_f32(a);
8150 }
8151
8152 // CHECK-LABEL: @test_vmovq_n_u8(
8153 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8154 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8155 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8156 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8157 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8158 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8159 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8160 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8161 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8162 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8163 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8164 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8165 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8166 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8167 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8168 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8169 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_u8(uint8_t a)8170 uint8x16_t test_vmovq_n_u8(uint8_t a) {
8171 return vmovq_n_u8(a);
8172 }
8173
8174 // CHECK-LABEL: @test_vmovq_n_u16(
8175 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8176 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8177 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8178 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8179 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8180 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8181 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8182 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8183 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_u16(uint16_t a)8184 uint16x8_t test_vmovq_n_u16(uint16_t a) {
8185 return vmovq_n_u16(a);
8186 }
8187
8188 // CHECK-LABEL: @test_vmovq_n_u32(
8189 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
8190 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
8191 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
8192 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
8193 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_u32(uint32_t a)8194 uint32x4_t test_vmovq_n_u32(uint32_t a) {
8195 return vmovq_n_u32(a);
8196 }
8197
8198 // CHECK-LABEL: @test_vmovq_n_s8(
8199 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8200 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8201 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8202 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8203 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8204 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8205 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8206 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8207 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8208 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8209 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8210 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8211 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8212 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8213 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8214 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8215 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_s8(int8_t a)8216 int8x16_t test_vmovq_n_s8(int8_t a) {
8217 return vmovq_n_s8(a);
8218 }
8219
8220 // CHECK-LABEL: @test_vmovq_n_s16(
8221 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8222 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8223 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8224 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8225 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8226 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8227 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8228 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8229 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_s16(int16_t a)8230 int16x8_t test_vmovq_n_s16(int16_t a) {
8231 return vmovq_n_s16(a);
8232 }
8233
8234 // CHECK-LABEL: @test_vmovq_n_s32(
8235 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
8236 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
8237 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
8238 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
8239 // CHECK: ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_s32(int32_t a)8240 int32x4_t test_vmovq_n_s32(int32_t a) {
8241 return vmovq_n_s32(a);
8242 }
8243
8244 // CHECK-LABEL: @test_vmovq_n_p8(
8245 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8246 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8247 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8248 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8249 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8250 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8251 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8252 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8253 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8254 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8255 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8256 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8257 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8258 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8259 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8260 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8261 // CHECK: ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_p8(poly8_t a)8262 poly8x16_t test_vmovq_n_p8(poly8_t a) {
8263 return vmovq_n_p8(a);
8264 }
8265
8266 // CHECK-LABEL: @test_vmovq_n_p16(
8267 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8268 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8269 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8270 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8271 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8272 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8273 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8274 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8275 // CHECK: ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_p16(poly16_t a)8276 poly16x8_t test_vmovq_n_p16(poly16_t a) {
8277 return vmovq_n_p16(a);
8278 }
8279
8280 // CHECK-LABEL: @test_vmovq_n_f16(
8281 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
8282 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
8283 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
8284 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
8285 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
8286 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
8287 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
8288 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
8289 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
8290 // CHECK: ret <8 x half> [[VECINIT7]]
test_vmovq_n_f16(float16_t * a)8291 float16x8_t test_vmovq_n_f16(float16_t *a) {
8292 return vmovq_n_f16(*a);
8293 }
8294
8295 // CHECK-LABEL: @test_vmovq_n_f32(
8296 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
8297 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
8298 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
8299 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
8300 // CHECK: ret <4 x float> [[VECINIT3_I]]
test_vmovq_n_f32(float32_t a)8301 float32x4_t test_vmovq_n_f32(float32_t a) {
8302 return vmovq_n_f32(a);
8303 }
8304
8305 // CHECK-LABEL: @test_vmov_n_s64(
8306 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
8307 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
8308 // CHECK: ret <1 x i64> [[ADD_I]]
test_vmov_n_s64(int64_t a)8309 int64x1_t test_vmov_n_s64(int64_t a) {
8310 int64x1_t tmp = vmov_n_s64(a);
8311 return vadd_s64(tmp, tmp);
8312 }
8313
8314 // CHECK-LABEL: @test_vmov_n_u64(
8315 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
8316 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
8317 // CHECK: ret <1 x i64> [[ADD_I]]
test_vmov_n_u64(uint64_t a)8318 uint64x1_t test_vmov_n_u64(uint64_t a) {
8319 uint64x1_t tmp = vmov_n_u64(a);
8320 return vadd_u64(tmp, tmp);
8321 }
8322
8323 // CHECK-LABEL: @test_vmovq_n_s64(
8324 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
8325 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
8326 // CHECK: ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_s64(int64_t a)8327 int64x2_t test_vmovq_n_s64(int64_t a) {
8328 return vmovq_n_s64(a);
8329 }
8330
8331 // CHECK-LABEL: @test_vmovq_n_u64(
8332 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
8333 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
8334 // CHECK: ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_u64(uint64_t a)8335 uint64x2_t test_vmovq_n_u64(uint64_t a) {
8336 return vmovq_n_u64(a);
8337 }
8338
8339 // CHECK-LABEL: @test_vmul_s8(
8340 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
8341 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t a,int8x8_t b)8342 int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) {
8343 return vmul_s8(a, b);
8344 }
8345
8346 // CHECK-LABEL: @test_vmul_s16(
8347 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
8348 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t a,int16x4_t b)8349 int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) {
8350 return vmul_s16(a, b);
8351 }
8352
8353 // CHECK-LABEL: @test_vmul_s32(
8354 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
8355 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t a,int32x2_t b)8356 int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) {
8357 return vmul_s32(a, b);
8358 }
8359
8360 // CHECK-LABEL: @test_vmul_f32(
8361 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, %b
8362 // CHECK: ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t a,float32x2_t b)8363 float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) {
8364 return vmul_f32(a, b);
8365 }
8366
8367 // CHECK-LABEL: @test_vmul_u8(
8368 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
8369 // CHECK: ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t a,uint8x8_t b)8370 uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) {
8371 return vmul_u8(a, b);
8372 }
8373
8374 // CHECK-LABEL: @test_vmul_u16(
8375 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
8376 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t a,uint16x4_t b)8377 uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) {
8378 return vmul_u16(a, b);
8379 }
8380
8381 // CHECK-LABEL: @test_vmul_u32(
8382 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
8383 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t a,uint32x2_t b)8384 uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) {
8385 return vmul_u32(a, b);
8386 }
8387
8388 // CHECK-LABEL: @test_vmulq_s8(
8389 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
8390 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t a,int8x16_t b)8391 int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) {
8392 return vmulq_s8(a, b);
8393 }
8394
8395 // CHECK-LABEL: @test_vmulq_s16(
8396 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
8397 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t a,int16x8_t b)8398 int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) {
8399 return vmulq_s16(a, b);
8400 }
8401
8402 // CHECK-LABEL: @test_vmulq_s32(
8403 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8404 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t a,int32x4_t b)8405 int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) {
8406 return vmulq_s32(a, b);
8407 }
8408
8409 // CHECK-LABEL: @test_vmulq_f32(
8410 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, %b
8411 // CHECK: ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t a,float32x4_t b)8412 float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) {
8413 return vmulq_f32(a, b);
8414 }
8415
8416 // CHECK-LABEL: @test_vmulq_u8(
8417 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
8418 // CHECK: ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t a,uint8x16_t b)8419 uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) {
8420 return vmulq_u8(a, b);
8421 }
8422
8423 // CHECK-LABEL: @test_vmulq_u16(
8424 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
8425 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t a,uint16x8_t b)8426 uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) {
8427 return vmulq_u16(a, b);
8428 }
8429
8430 // CHECK-LABEL: @test_vmulq_u32(
8431 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8432 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t a,uint32x4_t b)8433 uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) {
8434 return vmulq_u32(a, b);
8435 }
8436
8437 // CHECK-LABEL: @test_vmull_s8(
8438 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
8439 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)8440 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
8441 return vmull_s8(a, b);
8442 }
8443
8444 // CHECK-LABEL: @test_vmull_s16(
8445 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8446 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8447 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
8448 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)8449 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
8450 return vmull_s16(a, b);
8451 }
8452
8453 // CHECK-LABEL: @test_vmull_s32(
8454 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8455 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8456 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
8457 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)8458 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
8459 return vmull_s32(a, b);
8460 }
8461
8462 // CHECK-LABEL: @test_vmull_u8(
8463 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
8464 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)8465 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
8466 return vmull_u8(a, b);
8467 }
8468
8469 // CHECK-LABEL: @test_vmull_u16(
8470 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8471 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8472 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
8473 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)8474 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
8475 return vmull_u16(a, b);
8476 }
8477
8478 // CHECK-LABEL: @test_vmull_u32(
8479 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8480 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8481 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
8482 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)8483 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
8484 return vmull_u32(a, b);
8485 }
8486
8487 // CHECK-LABEL: @test_vmull_p8(
8488 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
8489 // CHECK: ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)8490 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8491 return vmull_p8(a, b);
8492 }
8493
8494 // CHECK-LABEL: @test_vmull_lane_s16(
8495 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8496 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8497 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8498 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8499 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8500 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
8501 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_s16(int16x4_t a,int16x4_t b)8502 int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) {
8503 return vmull_lane_s16(a, b, 3);
8504 }
8505
8506 // CHECK-LABEL: @test_vmull_lane_s32(
8507 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8508 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8509 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8510 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8511 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8512 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
8513 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_s32(int32x2_t a,int32x2_t b)8514 int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) {
8515 return vmull_lane_s32(a, b, 1);
8516 }
8517
8518 // CHECK-LABEL: @test_vmull_lane_u16(
8519 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8520 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8521 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8522 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8523 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8524 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
8525 // CHECK: ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_u16(uint16x4_t a,uint16x4_t b)8526 uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) {
8527 return vmull_lane_u16(a, b, 3);
8528 }
8529
8530 // CHECK-LABEL: @test_vmull_lane_u32(
8531 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8532 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8533 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8534 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8535 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8536 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
8537 // CHECK: ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_u32(uint32x2_t a,uint32x2_t b)8538 uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) {
8539 return vmull_lane_u32(a, b, 1);
8540 }
8541
8542 // CHECK-LABEL: @test_vmull_n_s16(
8543 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8544 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8545 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8546 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8547 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8548 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8549 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8550 // CHECK: ret <4 x i32> [[VMULL5_I]]
test_vmull_n_s16(int16x4_t a,int16_t b)8551 int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
8552 return vmull_n_s16(a, b);
8553 }
8554
8555 // CHECK-LABEL: @test_vmull_n_s32(
8556 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8557 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8558 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8559 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8560 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8561 // CHECK: ret <2 x i64> [[VMULL3_I]]
test_vmull_n_s32(int32x2_t a,int32_t b)8562 int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
8563 return vmull_n_s32(a, b);
8564 }
8565
8566 // CHECK-LABEL: @test_vmull_n_u16(
8567 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8568 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8569 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8570 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8571 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8572 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8573 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8574 // CHECK: ret <4 x i32> [[VMULL5_I]]
test_vmull_n_u16(uint16x4_t a,uint16_t b)8575 uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
8576 return vmull_n_u16(a, b);
8577 }
8578
8579 // CHECK-LABEL: @test_vmull_n_u32(
8580 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8581 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8582 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8583 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8584 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8585 // CHECK: ret <2 x i64> [[VMULL3_I]]
test_vmull_n_u32(uint32x2_t a,uint32_t b)8586 uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
8587 return vmull_n_u32(a, b);
8588 }
8589
8590 // CHECK-LABEL: @test_vmul_p8(
8591 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b)
8592 // CHECK: ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t a,poly8x8_t b)8593 poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) {
8594 return vmul_p8(a, b);
8595 }
8596
8597 // CHECK-LABEL: @test_vmulq_p8(
8598 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b)
8599 // CHECK: ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t a,poly8x16_t b)8600 poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) {
8601 return vmulq_p8(a, b);
8602 }
8603
8604 // CHECK-LABEL: @test_vmul_lane_s16(
8605 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8606 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8607 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8608 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8609 // CHECK: ret <4 x i16> [[MUL]]
test_vmul_lane_s16(int16x4_t a,int16x4_t b)8610 int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) {
8611 return vmul_lane_s16(a, b, 3);
8612 }
8613
8614 // CHECK-LABEL: @test_vmul_lane_s32(
8615 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8616 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8617 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8618 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8619 // CHECK: ret <2 x i32> [[MUL]]
test_vmul_lane_s32(int32x2_t a,int32x2_t b)8620 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) {
8621 return vmul_lane_s32(a, b, 1);
8622 }
8623
8624 // CHECK-LABEL: @test_vmul_lane_f32(
8625 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8626 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8627 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
8628 // CHECK: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
8629 // CHECK: ret <2 x float> [[MUL]]
test_vmul_lane_f32(float32x2_t a,float32x2_t b)8630 float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) {
8631 return vmul_lane_f32(a, b, 1);
8632 }
8633
8634 // CHECK-LABEL: @test_vmul_lane_u16(
8635 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8636 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8637 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8638 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8639 // CHECK: ret <4 x i16> [[MUL]]
test_vmul_lane_u16(uint16x4_t a,uint16x4_t b)8640 uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) {
8641 return vmul_lane_u16(a, b, 3);
8642 }
8643
8644 // CHECK-LABEL: @test_vmul_lane_u32(
8645 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8646 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8647 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8648 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8649 // CHECK: ret <2 x i32> [[MUL]]
test_vmul_lane_u32(uint32x2_t a,uint32x2_t b)8650 uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) {
8651 return vmul_lane_u32(a, b, 1);
8652 }
8653
8654 // CHECK-LABEL: @test_vmulq_lane_s16(
8655 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8656 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8657 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8658 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8659 // CHECK: ret <8 x i16> [[MUL]]
test_vmulq_lane_s16(int16x8_t a,int16x4_t b)8660 int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) {
8661 return vmulq_lane_s16(a, b, 3);
8662 }
8663
8664 // CHECK-LABEL: @test_vmulq_lane_s32(
8665 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8666 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8667 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8668 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8669 // CHECK: ret <4 x i32> [[MUL]]
test_vmulq_lane_s32(int32x4_t a,int32x2_t b)8670 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) {
8671 return vmulq_lane_s32(a, b, 1);
8672 }
8673
8674 // CHECK-LABEL: @test_vmulq_lane_f32(
8675 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8676 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8677 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8678 // CHECK: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
8679 // CHECK: ret <4 x float> [[MUL]]
test_vmulq_lane_f32(float32x4_t a,float32x2_t b)8680 float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) {
8681 return vmulq_lane_f32(a, b, 1);
8682 }
8683
8684 // CHECK-LABEL: @test_vmulq_lane_u16(
8685 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8686 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8687 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8688 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8689 // CHECK: ret <8 x i16> [[MUL]]
test_vmulq_lane_u16(uint16x8_t a,uint16x4_t b)8690 uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) {
8691 return vmulq_lane_u16(a, b, 3);
8692 }
8693
8694 // CHECK-LABEL: @test_vmulq_lane_u32(
8695 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8696 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8697 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8698 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8699 // CHECK: ret <4 x i32> [[MUL]]
test_vmulq_lane_u32(uint32x4_t a,uint32x2_t b)8700 uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) {
8701 return vmulq_lane_u32(a, b, 1);
8702 }
8703
8704 // CHECK-LABEL: @test_vmul_n_s16(
8705 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8706 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8707 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8708 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8709 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8710 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_n_s16(int16x4_t a,int16_t b)8711 int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
8712 return vmul_n_s16(a, b);
8713 }
8714
8715 // CHECK-LABEL: @test_vmul_n_s32(
8716 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8717 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8718 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8719 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_n_s32(int32x2_t a,int32_t b)8720 int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
8721 return vmul_n_s32(a, b);
8722 }
8723
8724 // CHECK-LABEL: @test_vmul_n_f32(
8725 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0
8726 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1
8727 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]]
8728 // CHECK: ret <2 x float> [[MUL_I]]
test_vmul_n_f32(float32x2_t a,float32_t b)8729 float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
8730 return vmul_n_f32(a, b);
8731 }
8732
8733 // CHECK-LABEL: @test_vmul_n_u16(
8734 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8735 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8736 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8737 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8738 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8739 // CHECK: ret <4 x i16> [[MUL_I]]
test_vmul_n_u16(uint16x4_t a,uint16_t b)8740 uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
8741 return vmul_n_u16(a, b);
8742 }
8743
8744 // CHECK-LABEL: @test_vmul_n_u32(
8745 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8746 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8747 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8748 // CHECK: ret <2 x i32> [[MUL_I]]
test_vmul_n_u32(uint32x2_t a,uint32_t b)8749 uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
8750 return vmul_n_u32(a, b);
8751 }
8752
8753 // CHECK-LABEL: @test_vmulq_n_s16(
8754 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8755 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8756 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8757 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8758 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8759 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8760 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8761 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8762 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8763 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_n_s16(int16x8_t a,int16_t b)8764 int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
8765 return vmulq_n_s16(a, b);
8766 }
8767
8768 // CHECK-LABEL: @test_vmulq_n_s32(
8769 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8770 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8771 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8772 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8773 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8774 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_n_s32(int32x4_t a,int32_t b)8775 int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
8776 return vmulq_n_s32(a, b);
8777 }
8778
8779 // CHECK-LABEL: @test_vmulq_n_f32(
8780 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0
8781 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1
8782 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2
8783 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3
8784 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]]
8785 // CHECK: ret <4 x float> [[MUL_I]]
test_vmulq_n_f32(float32x4_t a,float32_t b)8786 float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
8787 return vmulq_n_f32(a, b);
8788 }
8789
8790 // CHECK-LABEL: @test_vmulq_n_u16(
8791 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8792 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8793 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8794 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8795 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8796 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8797 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8798 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8799 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8800 // CHECK: ret <8 x i16> [[MUL_I]]
test_vmulq_n_u16(uint16x8_t a,uint16_t b)8801 uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
8802 return vmulq_n_u16(a, b);
8803 }
8804
8805 // CHECK-LABEL: @test_vmulq_n_u32(
8806 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8807 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8808 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8809 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8810 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8811 // CHECK: ret <4 x i32> [[MUL_I]]
test_vmulq_n_u32(uint32x4_t a,uint32_t b)8812 uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
8813 return vmulq_n_u32(a, b);
8814 }
8815
8816 // CHECK-LABEL: @test_vmvn_s8(
8817 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8818 // CHECK: ret <8 x i8> [[NEG_I]]
test_vmvn_s8(int8x8_t a)8819 int8x8_t test_vmvn_s8(int8x8_t a) {
8820 return vmvn_s8(a);
8821 }
8822
8823 // CHECK-LABEL: @test_vmvn_s16(
8824 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8825 // CHECK: ret <4 x i16> [[NEG_I]]
test_vmvn_s16(int16x4_t a)8826 int16x4_t test_vmvn_s16(int16x4_t a) {
8827 return vmvn_s16(a);
8828 }
8829
8830 // CHECK-LABEL: @test_vmvn_s32(
8831 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8832 // CHECK: ret <2 x i32> [[NEG_I]]
test_vmvn_s32(int32x2_t a)8833 int32x2_t test_vmvn_s32(int32x2_t a) {
8834 return vmvn_s32(a);
8835 }
8836
8837 // CHECK-LABEL: @test_vmvn_u8(
8838 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8839 // CHECK: ret <8 x i8> [[NEG_I]]
test_vmvn_u8(uint8x8_t a)8840 uint8x8_t test_vmvn_u8(uint8x8_t a) {
8841 return vmvn_u8(a);
8842 }
8843
8844 // CHECK-LABEL: @test_vmvn_u16(
8845 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8846 // CHECK: ret <4 x i16> [[NEG_I]]
test_vmvn_u16(uint16x4_t a)8847 uint16x4_t test_vmvn_u16(uint16x4_t a) {
8848 return vmvn_u16(a);
8849 }
8850
8851 // CHECK-LABEL: @test_vmvn_u32(
8852 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8853 // CHECK: ret <2 x i32> [[NEG_I]]
test_vmvn_u32(uint32x2_t a)8854 uint32x2_t test_vmvn_u32(uint32x2_t a) {
8855 return vmvn_u32(a);
8856 }
8857
8858 // CHECK-LABEL: @test_vmvn_p8(
8859 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8860 // CHECK: ret <8 x i8> [[NEG_I]]
test_vmvn_p8(poly8x8_t a)8861 poly8x8_t test_vmvn_p8(poly8x8_t a) {
8862 return vmvn_p8(a);
8863 }
8864
8865 // CHECK-LABEL: @test_vmvnq_s8(
8866 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8867 // CHECK: ret <16 x i8> [[NEG_I]]
test_vmvnq_s8(int8x16_t a)8868 int8x16_t test_vmvnq_s8(int8x16_t a) {
8869 return vmvnq_s8(a);
8870 }
8871
8872 // CHECK-LABEL: @test_vmvnq_s16(
8873 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8874 // CHECK: ret <8 x i16> [[NEG_I]]
test_vmvnq_s16(int16x8_t a)8875 int16x8_t test_vmvnq_s16(int16x8_t a) {
8876 return vmvnq_s16(a);
8877 }
8878
8879 // CHECK-LABEL: @test_vmvnq_s32(
8880 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8881 // CHECK: ret <4 x i32> [[NEG_I]]
test_vmvnq_s32(int32x4_t a)8882 int32x4_t test_vmvnq_s32(int32x4_t a) {
8883 return vmvnq_s32(a);
8884 }
8885
8886 // CHECK-LABEL: @test_vmvnq_u8(
8887 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8888 // CHECK: ret <16 x i8> [[NEG_I]]
test_vmvnq_u8(uint8x16_t a)8889 uint8x16_t test_vmvnq_u8(uint8x16_t a) {
8890 return vmvnq_u8(a);
8891 }
8892
8893 // CHECK-LABEL: @test_vmvnq_u16(
8894 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8895 // CHECK: ret <8 x i16> [[NEG_I]]
test_vmvnq_u16(uint16x8_t a)8896 uint16x8_t test_vmvnq_u16(uint16x8_t a) {
8897 return vmvnq_u16(a);
8898 }
8899
8900 // CHECK-LABEL: @test_vmvnq_u32(
8901 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8902 // CHECK: ret <4 x i32> [[NEG_I]]
test_vmvnq_u32(uint32x4_t a)8903 uint32x4_t test_vmvnq_u32(uint32x4_t a) {
8904 return vmvnq_u32(a);
8905 }
8906
8907 // CHECK-LABEL: @test_vmvnq_p8(
8908 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8909 // CHECK: ret <16 x i8> [[NEG_I]]
test_vmvnq_p8(poly8x16_t a)8910 poly8x16_t test_vmvnq_p8(poly8x16_t a) {
8911 return vmvnq_p8(a);
8912 }
8913
8914 // CHECK-LABEL: @test_vneg_s8(
8915 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
8916 // CHECK: ret <8 x i8> [[SUB_I]]
test_vneg_s8(int8x8_t a)8917 int8x8_t test_vneg_s8(int8x8_t a) {
8918 return vneg_s8(a);
8919 }
8920
8921 // CHECK-LABEL: @test_vneg_s16(
8922 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
8923 // CHECK: ret <4 x i16> [[SUB_I]]
test_vneg_s16(int16x4_t a)8924 int16x4_t test_vneg_s16(int16x4_t a) {
8925 return vneg_s16(a);
8926 }
8927
8928 // CHECK-LABEL: @test_vneg_s32(
8929 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
8930 // CHECK: ret <2 x i32> [[SUB_I]]
test_vneg_s32(int32x2_t a)8931 int32x2_t test_vneg_s32(int32x2_t a) {
8932 return vneg_s32(a);
8933 }
8934
8935 // CHECK-LABEL: @test_vneg_f32(
8936 // CHECK: [[SUB_I:%.*]] = fneg <2 x float> %a
8937 // CHECK: ret <2 x float> [[SUB_I]]
test_vneg_f32(float32x2_t a)8938 float32x2_t test_vneg_f32(float32x2_t a) {
8939 return vneg_f32(a);
8940 }
8941
8942 // CHECK-LABEL: @test_vnegq_s8(
8943 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
8944 // CHECK: ret <16 x i8> [[SUB_I]]
test_vnegq_s8(int8x16_t a)8945 int8x16_t test_vnegq_s8(int8x16_t a) {
8946 return vnegq_s8(a);
8947 }
8948
8949 // CHECK-LABEL: @test_vnegq_s16(
8950 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
8951 // CHECK: ret <8 x i16> [[SUB_I]]
test_vnegq_s16(int16x8_t a)8952 int16x8_t test_vnegq_s16(int16x8_t a) {
8953 return vnegq_s16(a);
8954 }
8955
8956 // CHECK-LABEL: @test_vnegq_s32(
8957 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
8958 // CHECK: ret <4 x i32> [[SUB_I]]
test_vnegq_s32(int32x4_t a)8959 int32x4_t test_vnegq_s32(int32x4_t a) {
8960 return vnegq_s32(a);
8961 }
8962
8963 // CHECK-LABEL: @test_vnegq_f32(
8964 // CHECK: [[SUB_I:%.*]] = fneg <4 x float> %a
8965 // CHECK: ret <4 x float> [[SUB_I]]
test_vnegq_f32(float32x4_t a)8966 float32x4_t test_vnegq_f32(float32x4_t a) {
8967 return vnegq_f32(a);
8968 }
8969
8970 // CHECK-LABEL: @test_vorn_s8(
8971 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8972 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
8973 // CHECK: ret <8 x i8> [[OR_I]]
test_vorn_s8(int8x8_t a,int8x8_t b)8974 int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
8975 return vorn_s8(a, b);
8976 }
8977
8978 // CHECK-LABEL: @test_vorn_s16(
8979 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
8980 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
8981 // CHECK: ret <4 x i16> [[OR_I]]
test_vorn_s16(int16x4_t a,int16x4_t b)8982 int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
8983 return vorn_s16(a, b);
8984 }
8985
8986 // CHECK-LABEL: @test_vorn_s32(
8987 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
8988 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
8989 // CHECK: ret <2 x i32> [[OR_I]]
test_vorn_s32(int32x2_t a,int32x2_t b)8990 int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
8991 return vorn_s32(a, b);
8992 }
8993
8994 // CHECK-LABEL: @test_vorn_s64(
8995 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
8996 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
8997 // CHECK: ret <1 x i64> [[OR_I]]
test_vorn_s64(int64x1_t a,int64x1_t b)8998 int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
8999 return vorn_s64(a, b);
9000 }
9001
9002 // CHECK-LABEL: @test_vorn_u8(
9003 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9004 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
9005 // CHECK: ret <8 x i8> [[OR_I]]
test_vorn_u8(uint8x8_t a,uint8x8_t b)9006 uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
9007 return vorn_u8(a, b);
9008 }
9009
9010 // CHECK-LABEL: @test_vorn_u16(
9011 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
9012 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
9013 // CHECK: ret <4 x i16> [[OR_I]]
test_vorn_u16(uint16x4_t a,uint16x4_t b)9014 uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
9015 return vorn_u16(a, b);
9016 }
9017
9018 // CHECK-LABEL: @test_vorn_u32(
9019 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
9020 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
9021 // CHECK: ret <2 x i32> [[OR_I]]
test_vorn_u32(uint32x2_t a,uint32x2_t b)9022 uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
9023 return vorn_u32(a, b);
9024 }
9025
9026 // CHECK-LABEL: @test_vorn_u64(
9027 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
9028 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
9029 // CHECK: ret <1 x i64> [[OR_I]]
test_vorn_u64(uint64x1_t a,uint64x1_t b)9030 uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
9031 return vorn_u64(a, b);
9032 }
9033
9034 // CHECK-LABEL: @test_vornq_s8(
9035 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9036 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
9037 // CHECK: ret <16 x i8> [[OR_I]]
test_vornq_s8(int8x16_t a,int8x16_t b)9038 int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
9039 return vornq_s8(a, b);
9040 }
9041
9042 // CHECK-LABEL: @test_vornq_s16(
9043 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
9044 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
9045 // CHECK: ret <8 x i16> [[OR_I]]
test_vornq_s16(int16x8_t a,int16x8_t b)9046 int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
9047 return vornq_s16(a, b);
9048 }
9049
9050 // CHECK-LABEL: @test_vornq_s32(
9051 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
9052 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
9053 // CHECK: ret <4 x i32> [[OR_I]]
test_vornq_s32(int32x4_t a,int32x4_t b)9054 int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
9055 return vornq_s32(a, b);
9056 }
9057
9058 // CHECK-LABEL: @test_vornq_s64(
9059 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
9060 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
9061 // CHECK: ret <2 x i64> [[OR_I]]
test_vornq_s64(int64x2_t a,int64x2_t b)9062 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
9063 return vornq_s64(a, b);
9064 }
9065
9066 // CHECK-LABEL: @test_vornq_u8(
9067 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9068 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
9069 // CHECK: ret <16 x i8> [[OR_I]]
test_vornq_u8(uint8x16_t a,uint8x16_t b)9070 uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
9071 return vornq_u8(a, b);
9072 }
9073
9074 // CHECK-LABEL: @test_vornq_u16(
9075 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
9076 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
9077 // CHECK: ret <8 x i16> [[OR_I]]
test_vornq_u16(uint16x8_t a,uint16x8_t b)9078 uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
9079 return vornq_u16(a, b);
9080 }
9081
9082 // CHECK-LABEL: @test_vornq_u32(
9083 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
9084 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
9085 // CHECK: ret <4 x i32> [[OR_I]]
test_vornq_u32(uint32x4_t a,uint32x4_t b)9086 uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
9087 return vornq_u32(a, b);
9088 }
9089
9090 // CHECK-LABEL: @test_vornq_u64(
9091 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
9092 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
9093 // CHECK: ret <2 x i64> [[OR_I]]
test_vornq_u64(uint64x2_t a,uint64x2_t b)9094 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
9095 return vornq_u64(a, b);
9096 }
9097
9098 // CHECK-LABEL: @test_vorr_s8(
9099 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
9100 // CHECK: ret <8 x i8> [[OR_I]]
test_vorr_s8(int8x8_t a,int8x8_t b)9101 int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
9102 return vorr_s8(a, b);
9103 }
9104
9105 // CHECK-LABEL: @test_vorr_s16(
9106 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
9107 // CHECK: ret <4 x i16> [[OR_I]]
test_vorr_s16(int16x4_t a,int16x4_t b)9108 int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
9109 return vorr_s16(a, b);
9110 }
9111
9112 // CHECK-LABEL: @test_vorr_s32(
9113 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
9114 // CHECK: ret <2 x i32> [[OR_I]]
test_vorr_s32(int32x2_t a,int32x2_t b)9115 int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
9116 return vorr_s32(a, b);
9117 }
9118
9119 // CHECK-LABEL: @test_vorr_s64(
9120 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
9121 // CHECK: ret <1 x i64> [[OR_I]]
test_vorr_s64(int64x1_t a,int64x1_t b)9122 int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
9123 return vorr_s64(a, b);
9124 }
9125
9126 // CHECK-LABEL: @test_vorr_u8(
9127 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
9128 // CHECK: ret <8 x i8> [[OR_I]]
test_vorr_u8(uint8x8_t a,uint8x8_t b)9129 uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
9130 return vorr_u8(a, b);
9131 }
9132
9133 // CHECK-LABEL: @test_vorr_u16(
9134 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
9135 // CHECK: ret <4 x i16> [[OR_I]]
test_vorr_u16(uint16x4_t a,uint16x4_t b)9136 uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
9137 return vorr_u16(a, b);
9138 }
9139
9140 // CHECK-LABEL: @test_vorr_u32(
9141 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
9142 // CHECK: ret <2 x i32> [[OR_I]]
test_vorr_u32(uint32x2_t a,uint32x2_t b)9143 uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
9144 return vorr_u32(a, b);
9145 }
9146
9147 // CHECK-LABEL: @test_vorr_u64(
9148 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
9149 // CHECK: ret <1 x i64> [[OR_I]]
test_vorr_u64(uint64x1_t a,uint64x1_t b)9150 uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
9151 return vorr_u64(a, b);
9152 }
9153
9154 // CHECK-LABEL: @test_vorrq_s8(
9155 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
9156 // CHECK: ret <16 x i8> [[OR_I]]
test_vorrq_s8(int8x16_t a,int8x16_t b)9157 int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
9158 return vorrq_s8(a, b);
9159 }
9160
9161 // CHECK-LABEL: @test_vorrq_s16(
9162 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
9163 // CHECK: ret <8 x i16> [[OR_I]]
test_vorrq_s16(int16x8_t a,int16x8_t b)9164 int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
9165 return vorrq_s16(a, b);
9166 }
9167
9168 // CHECK-LABEL: @test_vorrq_s32(
9169 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
9170 // CHECK: ret <4 x i32> [[OR_I]]
test_vorrq_s32(int32x4_t a,int32x4_t b)9171 int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
9172 return vorrq_s32(a, b);
9173 }
9174
9175 // CHECK-LABEL: @test_vorrq_s64(
9176 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
9177 // CHECK: ret <2 x i64> [[OR_I]]
test_vorrq_s64(int64x2_t a,int64x2_t b)9178 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
9179 return vorrq_s64(a, b);
9180 }
9181
9182 // CHECK-LABEL: @test_vorrq_u8(
9183 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
9184 // CHECK: ret <16 x i8> [[OR_I]]
test_vorrq_u8(uint8x16_t a,uint8x16_t b)9185 uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
9186 return vorrq_u8(a, b);
9187 }
9188
9189 // CHECK-LABEL: @test_vorrq_u16(
9190 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
9191 // CHECK: ret <8 x i16> [[OR_I]]
test_vorrq_u16(uint16x8_t a,uint16x8_t b)9192 uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
9193 return vorrq_u16(a, b);
9194 }
9195
9196 // CHECK-LABEL: @test_vorrq_u32(
9197 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
9198 // CHECK: ret <4 x i32> [[OR_I]]
test_vorrq_u32(uint32x4_t a,uint32x4_t b)9199 uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
9200 return vorrq_u32(a, b);
9201 }
9202
9203 // CHECK-LABEL: @test_vorrq_u64(
9204 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
9205 // CHECK: ret <2 x i64> [[OR_I]]
test_vorrq_u64(uint64x2_t a,uint64x2_t b)9206 uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
9207 return vorrq_u64(a, b);
9208 }
9209
9210 // CHECK-LABEL: @test_vpadal_s8(
9211 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9212 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
9213 // CHECK: ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_s8(int16x4_t a,int8x8_t b)9214 int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
9215 return vpadal_s8(a, b);
9216 }
9217
9218 // CHECK-LABEL: @test_vpadal_s16(
9219 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9220 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9221 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
9222 // CHECK: ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_s16(int32x2_t a,int16x4_t b)9223 int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
9224 return vpadal_s16(a, b);
9225 }
9226
9227 // CHECK-LABEL: @test_vpadal_s32(
9228 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9229 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9230 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
9231 // CHECK: ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_s32(int64x1_t a,int32x2_t b)9232 int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
9233 return vpadal_s32(a, b);
9234 }
9235
9236 // CHECK-LABEL: @test_vpadal_u8(
9237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9238 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
9239 // CHECK: ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_u8(uint16x4_t a,uint8x8_t b)9240 uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
9241 return vpadal_u8(a, b);
9242 }
9243
9244 // CHECK-LABEL: @test_vpadal_u16(
9245 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9246 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9247 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
9248 // CHECK: ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_u16(uint32x2_t a,uint16x4_t b)9249 uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
9250 return vpadal_u16(a, b);
9251 }
9252
9253 // CHECK-LABEL: @test_vpadal_u32(
9254 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9255 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9256 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
9257 // CHECK: ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_u32(uint64x1_t a,uint32x2_t b)9258 uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
9259 return vpadal_u32(a, b);
9260 }
9261
9262 // CHECK-LABEL: @test_vpadalq_s8(
9263 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9264 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
9265 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_s8(int16x8_t a,int8x16_t b)9266 int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
9267 return vpadalq_s8(a, b);
9268 }
9269
9270 // CHECK-LABEL: @test_vpadalq_s16(
9271 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9272 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9273 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
9274 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_s16(int32x4_t a,int16x8_t b)9275 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
9276 return vpadalq_s16(a, b);
9277 }
9278
9279 // CHECK-LABEL: @test_vpadalq_s32(
9280 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9281 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9282 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
9283 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_s32(int64x2_t a,int32x4_t b)9284 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
9285 return vpadalq_s32(a, b);
9286 }
9287
9288 // CHECK-LABEL: @test_vpadalq_u8(
9289 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9290 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
9291 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_u8(uint16x8_t a,uint8x16_t b)9292 uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
9293 return vpadalq_u8(a, b);
9294 }
9295
9296 // CHECK-LABEL: @test_vpadalq_u16(
9297 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9298 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9299 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
9300 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_u16(uint32x4_t a,uint16x8_t b)9301 uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
9302 return vpadalq_u16(a, b);
9303 }
9304
9305 // CHECK-LABEL: @test_vpadalq_u32(
9306 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9307 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9308 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
9309 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_u32(uint64x2_t a,uint32x4_t b)9310 uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
9311 return vpadalq_u32(a, b);
9312 }
9313
9314 // CHECK-LABEL: @test_vpadd_s8(
9315 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
9316 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)9317 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
9318 return vpadd_s8(a, b);
9319 }
9320
9321 // CHECK-LABEL: @test_vpadd_s16(
9322 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9323 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9324 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
9325 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
9326 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_s16(int16x4_t a,int16x4_t b)9327 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
9328 return vpadd_s16(a, b);
9329 }
9330
9331 // CHECK-LABEL: @test_vpadd_s32(
9332 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9333 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9334 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
9335 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
9336 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_s32(int32x2_t a,int32x2_t b)9337 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
9338 return vpadd_s32(a, b);
9339 }
9340
9341 // CHECK-LABEL: @test_vpadd_u8(
9342 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
9343 // CHECK: ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)9344 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
9345 return vpadd_u8(a, b);
9346 }
9347
9348 // CHECK-LABEL: @test_vpadd_u16(
9349 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9350 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9351 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
9352 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
9353 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)9354 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
9355 return vpadd_u16(a, b);
9356 }
9357
9358 // CHECK-LABEL: @test_vpadd_u32(
9359 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9360 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9361 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
9362 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
9363 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)9364 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
9365 return vpadd_u32(a, b);
9366 }
9367
9368 // CHECK-LABEL: @test_vpadd_f32(
9369 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9370 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9371 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b)
9372 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
9373 // CHECK: ret <2 x float> [[VPADD_V2_I]]
test_vpadd_f32(float32x2_t a,float32x2_t b)9374 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
9375 return vpadd_f32(a, b);
9376 }
9377
9378 // CHECK-LABEL: @test_vpaddl_s8(
9379 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a)
9380 // CHECK: ret <4 x i16> [[VPADDL_I]]
test_vpaddl_s8(int8x8_t a)9381 int16x4_t test_vpaddl_s8(int8x8_t a) {
9382 return vpaddl_s8(a);
9383 }
9384
9385 // CHECK-LABEL: @test_vpaddl_s16(
9386 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9387 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a)
9388 // CHECK: ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_s16(int16x4_t a)9389 int32x2_t test_vpaddl_s16(int16x4_t a) {
9390 return vpaddl_s16(a);
9391 }
9392
9393 // CHECK-LABEL: @test_vpaddl_s32(
9394 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9395 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a)
9396 // CHECK: ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_s32(int32x2_t a)9397 int64x1_t test_vpaddl_s32(int32x2_t a) {
9398 return vpaddl_s32(a);
9399 }
9400
9401 // CHECK-LABEL: @test_vpaddl_u8(
9402 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a)
9403 // CHECK: ret <4 x i16> [[VPADDL_I]]
test_vpaddl_u8(uint8x8_t a)9404 uint16x4_t test_vpaddl_u8(uint8x8_t a) {
9405 return vpaddl_u8(a);
9406 }
9407
9408 // CHECK-LABEL: @test_vpaddl_u16(
9409 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9410 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a)
9411 // CHECK: ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_u16(uint16x4_t a)9412 uint32x2_t test_vpaddl_u16(uint16x4_t a) {
9413 return vpaddl_u16(a);
9414 }
9415
9416 // CHECK-LABEL: @test_vpaddl_u32(
9417 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9418 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a)
9419 // CHECK: ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_u32(uint32x2_t a)9420 uint64x1_t test_vpaddl_u32(uint32x2_t a) {
9421 return vpaddl_u32(a);
9422 }
9423
9424 // CHECK-LABEL: @test_vpaddlq_s8(
9425 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a)
9426 // CHECK: ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_s8(int8x16_t a)9427 int16x8_t test_vpaddlq_s8(int8x16_t a) {
9428 return vpaddlq_s8(a);
9429 }
9430
9431 // CHECK-LABEL: @test_vpaddlq_s16(
9432 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9433 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a)
9434 // CHECK: ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_s16(int16x8_t a)9435 int32x4_t test_vpaddlq_s16(int16x8_t a) {
9436 return vpaddlq_s16(a);
9437 }
9438
9439 // CHECK-LABEL: @test_vpaddlq_s32(
9440 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9441 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a)
9442 // CHECK: ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_s32(int32x4_t a)9443 int64x2_t test_vpaddlq_s32(int32x4_t a) {
9444 return vpaddlq_s32(a);
9445 }
9446
9447 // CHECK-LABEL: @test_vpaddlq_u8(
9448 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a)
9449 // CHECK: ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_u8(uint8x16_t a)9450 uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
9451 return vpaddlq_u8(a);
9452 }
9453
9454 // CHECK-LABEL: @test_vpaddlq_u16(
9455 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9456 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a)
9457 // CHECK: ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_u16(uint16x8_t a)9458 uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
9459 return vpaddlq_u16(a);
9460 }
9461
9462 // CHECK-LABEL: @test_vpaddlq_u32(
9463 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9464 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a)
9465 // CHECK: ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_u32(uint32x4_t a)9466 uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
9467 return vpaddlq_u32(a);
9468 }
9469
9470 // CHECK-LABEL: @test_vpmax_s8(
9471 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
9472 // CHECK: ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)9473 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
9474 return vpmax_s8(a, b);
9475 }
9476
9477 // CHECK-LABEL: @test_vpmax_s16(
9478 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9479 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9480 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
9481 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9482 // CHECK: ret <4 x i16> [[VPMAX_V2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)9483 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
9484 return vpmax_s16(a, b);
9485 }
9486
9487 // CHECK-LABEL: @test_vpmax_s32(
9488 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9489 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9490 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
9491 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9492 // CHECK: ret <2 x i32> [[VPMAX_V2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)9493 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
9494 return vpmax_s32(a, b);
9495 }
9496
9497 // CHECK-LABEL: @test_vpmax_u8(
9498 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
9499 // CHECK: ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)9500 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
9501 return vpmax_u8(a, b);
9502 }
9503
9504 // CHECK-LABEL: @test_vpmax_u16(
9505 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9506 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9507 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
9508 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9509 // CHECK: ret <4 x i16> [[VPMAX_V2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)9510 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
9511 return vpmax_u16(a, b);
9512 }
9513
9514 // CHECK-LABEL: @test_vpmax_u32(
9515 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9516 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9517 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
9518 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9519 // CHECK: ret <2 x i32> [[VPMAX_V2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)9520 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
9521 return vpmax_u32(a, b);
9522 }
9523
9524 // CHECK-LABEL: @test_vpmax_f32(
9525 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9526 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9527 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b)
9528 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8>
9529 // CHECK: ret <2 x float> [[VPMAX_V2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)9530 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
9531 return vpmax_f32(a, b);
9532 }
9533
9534 // CHECK-LABEL: @test_vpmin_s8(
9535 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b)
9536 // CHECK: ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)9537 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
9538 return vpmin_s8(a, b);
9539 }
9540
9541 // CHECK-LABEL: @test_vpmin_s16(
9542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9543 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9544 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b)
9545 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9546 // CHECK: ret <4 x i16> [[VPMIN_V2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)9547 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
9548 return vpmin_s16(a, b);
9549 }
9550
9551 // CHECK-LABEL: @test_vpmin_s32(
9552 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9553 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9554 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b)
9555 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9556 // CHECK: ret <2 x i32> [[VPMIN_V2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)9557 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
9558 return vpmin_s32(a, b);
9559 }
9560
9561 // CHECK-LABEL: @test_vpmin_u8(
9562 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b)
9563 // CHECK: ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)9564 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
9565 return vpmin_u8(a, b);
9566 }
9567
9568 // CHECK-LABEL: @test_vpmin_u16(
9569 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9570 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9571 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b)
9572 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9573 // CHECK: ret <4 x i16> [[VPMIN_V2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)9574 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
9575 return vpmin_u16(a, b);
9576 }
9577
9578 // CHECK-LABEL: @test_vpmin_u32(
9579 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9580 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9581 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b)
9582 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9583 // CHECK: ret <2 x i32> [[VPMIN_V2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)9584 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
9585 return vpmin_u32(a, b);
9586 }
9587
9588 // CHECK-LABEL: @test_vpmin_f32(
9589 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9590 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9591 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b)
9592 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8>
9593 // CHECK: ret <2 x float> [[VPMIN_V2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)9594 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
9595 return vpmin_f32(a, b);
9596 }
9597
9598 // CHECK-LABEL: @test_vqabs_s8(
9599 // CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a)
9600 // CHECK: ret <8 x i8> [[VQABS_V_I]]
test_vqabs_s8(int8x8_t a)9601 int8x8_t test_vqabs_s8(int8x8_t a) {
9602 return vqabs_s8(a);
9603 }
9604
9605 // CHECK-LABEL: @test_vqabs_s16(
9606 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9607 // CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a)
9608 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
9609 // CHECK: ret <4 x i16> [[VQABS_V1_I]]
test_vqabs_s16(int16x4_t a)9610 int16x4_t test_vqabs_s16(int16x4_t a) {
9611 return vqabs_s16(a);
9612 }
9613
9614 // CHECK-LABEL: @test_vqabs_s32(
9615 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9616 // CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a)
9617 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
9618 // CHECK: ret <2 x i32> [[VQABS_V1_I]]
test_vqabs_s32(int32x2_t a)9619 int32x2_t test_vqabs_s32(int32x2_t a) {
9620 return vqabs_s32(a);
9621 }
9622
9623 // CHECK-LABEL: @test_vqabsq_s8(
9624 // CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a)
9625 // CHECK: ret <16 x i8> [[VQABSQ_V_I]]
test_vqabsq_s8(int8x16_t a)9626 int8x16_t test_vqabsq_s8(int8x16_t a) {
9627 return vqabsq_s8(a);
9628 }
9629
9630 // CHECK-LABEL: @test_vqabsq_s16(
9631 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9632 // CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a)
9633 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
9634 // CHECK: ret <8 x i16> [[VQABSQ_V1_I]]
test_vqabsq_s16(int16x8_t a)9635 int16x8_t test_vqabsq_s16(int16x8_t a) {
9636 return vqabsq_s16(a);
9637 }
9638
9639 // CHECK-LABEL: @test_vqabsq_s32(
9640 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9641 // CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a)
9642 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
9643 // CHECK: ret <4 x i32> [[VQABSQ_V1_I]]
test_vqabsq_s32(int32x4_t a)9644 int32x4_t test_vqabsq_s32(int32x4_t a) {
9645 return vqabsq_s32(a);
9646 }
9647
9648 // CHECK-LABEL: @test_vqadd_s8(
9649 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9650 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)9651 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
9652 return vqadd_s8(a, b);
9653 }
9654
9655 // CHECK-LABEL: @test_vqadd_s16(
9656 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9657 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9658 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9659 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9660 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_s16(int16x4_t a,int16x4_t b)9661 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
9662 return vqadd_s16(a, b);
9663 }
9664
9665 // CHECK-LABEL: @test_vqadd_s32(
9666 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9667 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9668 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9669 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9670 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_s32(int32x2_t a,int32x2_t b)9671 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
9672 return vqadd_s32(a, b);
9673 }
9674
9675 // CHECK-LABEL: @test_vqadd_s64(
9676 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9677 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9678 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9679 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9680 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_s64(int64x1_t a,int64x1_t b)9681 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
9682 return vqadd_s64(a, b);
9683 }
9684
9685 // CHECK-LABEL: @test_vqadd_u8(
9686 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9687 // CHECK: ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)9688 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
9689 return vqadd_u8(a, b);
9690 }
9691
9692 // CHECK-LABEL: @test_vqadd_u16(
9693 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9694 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9695 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9696 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9697 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)9698 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
9699 return vqadd_u16(a, b);
9700 }
9701
9702 // CHECK-LABEL: @test_vqadd_u32(
9703 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9704 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9705 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9706 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9707 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)9708 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
9709 return vqadd_u32(a, b);
9710 }
9711
9712 // CHECK-LABEL: @test_vqadd_u64(
9713 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9714 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9715 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9716 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9717 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)9718 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
9719 return vqadd_u64(a, b);
9720 }
9721
9722 // CHECK-LABEL: @test_vqaddq_s8(
9723 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9724 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)9725 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
9726 return vqaddq_s8(a, b);
9727 }
9728
9729 // CHECK-LABEL: @test_vqaddq_s16(
9730 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9731 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9732 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9733 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9734 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)9735 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
9736 return vqaddq_s16(a, b);
9737 }
9738
9739 // CHECK-LABEL: @test_vqaddq_s32(
9740 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9741 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9742 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9743 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9744 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)9745 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
9746 return vqaddq_s32(a, b);
9747 }
9748
9749 // CHECK-LABEL: @test_vqaddq_s64(
9750 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9751 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9752 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9753 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9754 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)9755 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
9756 return vqaddq_s64(a, b);
9757 }
9758
9759 // CHECK-LABEL: @test_vqaddq_u8(
9760 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9761 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)9762 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
9763 return vqaddq_u8(a, b);
9764 }
9765
9766 // CHECK-LABEL: @test_vqaddq_u16(
9767 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9768 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9769 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9770 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9771 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)9772 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
9773 return vqaddq_u16(a, b);
9774 }
9775
9776 // CHECK-LABEL: @test_vqaddq_u32(
9777 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9778 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9779 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9780 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9781 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)9782 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
9783 return vqaddq_u32(a, b);
9784 }
9785
9786 // CHECK-LABEL: @test_vqaddq_u64(
9787 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9788 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9789 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9790 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9791 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)9792 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
9793 return vqaddq_u64(a, b);
9794 }
9795
9796 // CHECK-LABEL: @test_vqdmlal_s16(
9797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9798 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9799 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9800 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9801 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9802 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)9803 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9804 return vqdmlal_s16(a, b, c);
9805 }
9806
9807 // CHECK-LABEL: @test_vqdmlal_s32(
9808 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9809 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9810 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9811 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9812 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9813 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)9814 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9815 return vqdmlal_s32(a, b, c);
9816 }
9817
9818 // CHECK-LABEL: @test_vqdmlal_lane_s16(
9819 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9820 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9821 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9822 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9823 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9824 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9825 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
9826 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) #8
9827 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)9828 int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9829 return vqdmlal_lane_s16(a, b, c, 3);
9830 }
9831
9832 // CHECK-LABEL: @test_vqdmlal_lane_s32(
9833 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9834 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9835 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9836 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9837 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9838 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9839 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
9840 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) #8
9841 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9842 int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9843 return vqdmlal_lane_s32(a, b, c, 1);
9844 }
9845
9846 // CHECK-LABEL: @test_vqdmlal_n_s16(
9847 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9848 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9849 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9850 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9851 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9852 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9853 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9854 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9855 // CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9856 // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]]
test_vqdmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)9857 int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9858 return vqdmlal_n_s16(a, b, c);
9859 }
9860
9861 // CHECK-LABEL: @test_vqdmlal_n_s32(
9862 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9863 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9864 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9865 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9866 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9867 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9868 // CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9869 // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]]
test_vqdmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)9870 int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9871 return vqdmlal_n_s32(a, b, c);
9872 }
9873
9874 // CHECK-LABEL: @test_vqdmlsl_s16(
9875 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9876 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9877 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9878 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9879 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9880 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)9881 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9882 return vqdmlsl_s16(a, b, c);
9883 }
9884
9885 // CHECK-LABEL: @test_vqdmlsl_s32(
9886 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9887 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9888 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9889 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9890 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9891 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)9892 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9893 return vqdmlsl_s32(a, b, c);
9894 }
9895
9896 // CHECK-LABEL: @test_vqdmlsl_lane_s16(
9897 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9898 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9899 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9900 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9901 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9902 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9903 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
9904 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) #8
9905 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)9906 int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9907 return vqdmlsl_lane_s16(a, b, c, 3);
9908 }
9909
9910 // CHECK-LABEL: @test_vqdmlsl_lane_s32(
9911 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9912 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9913 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9914 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9915 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9916 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9917 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
9918 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) #8
9919 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9920 int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9921 return vqdmlsl_lane_s32(a, b, c, 1);
9922 }
9923
9924 // CHECK-LABEL: @test_vqdmlsl_n_s16(
9925 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9926 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9927 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9928 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9929 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9930 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9931 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9932 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9933 // CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9934 // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]]
test_vqdmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)9935 int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9936 return vqdmlsl_n_s16(a, b, c);
9937 }
9938
9939 // CHECK-LABEL: @test_vqdmlsl_n_s32(
9940 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9941 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9942 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9943 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9944 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9945 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9946 // CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9947 // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]]
test_vqdmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)9948 int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9949 return vqdmlsl_n_s32(a, b, c);
9950 }
9951
9952 // CHECK-LABEL: @test_vqdmulh_s16(
9953 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9954 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9955 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
9956 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
9957 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)9958 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
9959 return vqdmulh_s16(a, b);
9960 }
9961
9962 // CHECK-LABEL: @test_vqdmulh_s32(
9963 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9964 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9965 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
9966 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
9967 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)9968 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
9969 return vqdmulh_s32(a, b);
9970 }
9971
9972 // CHECK-LABEL: @test_vqdmulhq_s16(
9973 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9974 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9975 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
9976 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
9977 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)9978 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
9979 return vqdmulhq_s16(a, b);
9980 }
9981
9982 // CHECK-LABEL: @test_vqdmulhq_s32(
9983 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9984 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9985 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
9986 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
9987 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)9988 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
9989 return vqdmulhq_s32(a, b);
9990 }
9991
9992 // CHECK-LABEL: @test_vqdmulh_lane_s16(
9993 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9994 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9995 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9996 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
9997 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9998 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
9999 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
10000 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_lane_s16(int16x4_t a,int16x4_t b)10001 int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) {
10002 return vqdmulh_lane_s16(a, b, 3);
10003 }
10004
10005 // CHECK-LABEL: @test_vqdmulh_lane_s32(
10006 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10007 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10008 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10009 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10010 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10011 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
10012 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
10013 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_lane_s32(int32x2_t a,int32x2_t b)10014 int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) {
10015 return vqdmulh_lane_s32(a, b, 1);
10016 }
10017
10018 // CHECK-LABEL: @test_vqdmulhq_lane_s16(
10019 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10020 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10021 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10022 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
10023 // CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
10024 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]]) #8
10025 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
10026 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_lane_s16(int16x8_t a,int16x4_t b)10027 int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
10028 return vqdmulhq_lane_s16(a, b, 3);
10029 }
10030
10031 // CHECK-LABEL: @test_vqdmulhq_lane_s32(
10032 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10033 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10034 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10035 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
10036 // CHECK: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
10037 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]]) #8
10038 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
10039 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_lane_s32(int32x4_t a,int32x2_t b)10040 int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
10041 return vqdmulhq_lane_s32(a, b, 1);
10042 }
10043
10044 // CHECK-LABEL: @test_vqdmulh_n_s16(
10045 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10046 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10047 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10048 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10049 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10050 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10051 // CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10052 // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8>
10053 // CHECK: ret <4 x i16> [[VQDMULH_V5_I]]
test_vqdmulh_n_s16(int16x4_t a,int16_t b)10054 int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
10055 return vqdmulh_n_s16(a, b);
10056 }
10057
10058 // CHECK-LABEL: @test_vqdmulh_n_s32(
10059 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10060 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10061 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10062 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10063 // CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10064 // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8>
10065 // CHECK: ret <2 x i32> [[VQDMULH_V3_I]]
test_vqdmulh_n_s32(int32x2_t a,int32_t b)10066 int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
10067 return vqdmulh_n_s32(a, b);
10068 }
10069
10070 // CHECK-LABEL: @test_vqdmulhq_n_s16(
10071 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10072 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10073 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10074 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10075 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10076 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10077 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10078 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10079 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10080 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
10081 // CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
10082 // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8>
10083 // CHECK: ret <8 x i16> [[VQDMULHQ_V9_I]]
test_vqdmulhq_n_s16(int16x8_t a,int16_t b)10084 int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
10085 return vqdmulhq_n_s16(a, b);
10086 }
10087
10088 // CHECK-LABEL: @test_vqdmulhq_n_s32(
10089 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10090 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10091 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10092 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10093 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10094 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
10095 // CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
10096 // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8>
10097 // CHECK: ret <4 x i32> [[VQDMULHQ_V5_I]]
test_vqdmulhq_n_s32(int32x4_t a,int32_t b)10098 int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
10099 return vqdmulhq_n_s32(a, b);
10100 }
10101
10102 // CHECK-LABEL: @test_vqdmull_s16(
10103 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10104 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10105 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
10106 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
10107 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)10108 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
10109 return vqdmull_s16(a, b);
10110 }
10111
10112 // CHECK-LABEL: @test_vqdmull_s32(
10113 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10114 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10115 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
10116 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
10117 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)10118 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
10119 return vqdmull_s32(a, b);
10120 }
10121
10122 // CHECK-LABEL: @test_vqdmull_lane_s16(
10123 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10124 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10125 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10126 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10127 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10128 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
10129 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
10130 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_lane_s16(int16x4_t a,int16x4_t b)10131 int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) {
10132 return vqdmull_lane_s16(a, b, 3);
10133 }
10134
10135 // CHECK-LABEL: @test_vqdmull_lane_s32(
10136 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10137 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10138 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10139 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10140 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10141 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
10142 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
10143 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_lane_s32(int32x2_t a,int32x2_t b)10144 int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) {
10145 return vqdmull_lane_s32(a, b, 1);
10146 }
10147
10148 // CHECK-LABEL: @test_vqdmull_n_s16(
10149 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10150 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10151 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10152 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10153 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10154 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10155 // CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10156 // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8>
10157 // CHECK: ret <4 x i32> [[VQDMULL_V5_I]]
test_vqdmull_n_s16(int16x4_t a,int16_t b)10158 int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
10159 return vqdmull_n_s16(a, b);
10160 }
10161
10162 // CHECK-LABEL: @test_vqdmull_n_s32(
10163 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10164 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10165 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10166 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10167 // CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10168 // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8>
10169 // CHECK: ret <2 x i64> [[VQDMULL_V3_I]]
test_vqdmull_n_s32(int32x2_t a,int32_t b)10170 int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
10171 return vqdmull_n_s32(a, b);
10172 }
10173
10174 // CHECK-LABEL: @test_vqmovn_s16(
10175 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10176 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a)
10177 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_s16(int16x8_t a)10178 int8x8_t test_vqmovn_s16(int16x8_t a) {
10179 return vqmovn_s16(a);
10180 }
10181
10182 // CHECK-LABEL: @test_vqmovn_s32(
10183 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10184 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a)
10185 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
10186 // CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
test_vqmovn_s32(int32x4_t a)10187 int16x4_t test_vqmovn_s32(int32x4_t a) {
10188 return vqmovn_s32(a);
10189 }
10190
10191 // CHECK-LABEL: @test_vqmovn_s64(
10192 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10193 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a)
10194 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
10195 // CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
test_vqmovn_s64(int64x2_t a)10196 int32x2_t test_vqmovn_s64(int64x2_t a) {
10197 return vqmovn_s64(a);
10198 }
10199
10200 // CHECK-LABEL: @test_vqmovn_u16(
10201 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10202 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a)
10203 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_u16(uint16x8_t a)10204 uint8x8_t test_vqmovn_u16(uint16x8_t a) {
10205 return vqmovn_u16(a);
10206 }
10207
10208 // CHECK-LABEL: @test_vqmovn_u32(
10209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10210 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a)
10211 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
10212 // CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
test_vqmovn_u32(uint32x4_t a)10213 uint16x4_t test_vqmovn_u32(uint32x4_t a) {
10214 return vqmovn_u32(a);
10215 }
10216
10217 // CHECK-LABEL: @test_vqmovn_u64(
10218 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10219 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a)
10220 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
10221 // CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
test_vqmovn_u64(uint64x2_t a)10222 uint32x2_t test_vqmovn_u64(uint64x2_t a) {
10223 return vqmovn_u64(a);
10224 }
10225
10226 // CHECK-LABEL: @test_vqmovun_s16(
10227 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10228 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a)
10229 // CHECK: ret <8 x i8> [[VQMOVUN_V1_I]]
test_vqmovun_s16(int16x8_t a)10230 uint8x8_t test_vqmovun_s16(int16x8_t a) {
10231 return vqmovun_s16(a);
10232 }
10233
10234 // CHECK-LABEL: @test_vqmovun_s32(
10235 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10236 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a)
10237 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
10238 // CHECK: ret <4 x i16> [[VQMOVUN_V1_I]]
test_vqmovun_s32(int32x4_t a)10239 uint16x4_t test_vqmovun_s32(int32x4_t a) {
10240 return vqmovun_s32(a);
10241 }
10242
10243 // CHECK-LABEL: @test_vqmovun_s64(
10244 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10245 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a)
10246 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
10247 // CHECK: ret <2 x i32> [[VQMOVUN_V1_I]]
test_vqmovun_s64(int64x2_t a)10248 uint32x2_t test_vqmovun_s64(int64x2_t a) {
10249 return vqmovun_s64(a);
10250 }
10251
10252 // CHECK-LABEL: @test_vqneg_s8(
10253 // CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a)
10254 // CHECK: ret <8 x i8> [[VQNEG_V_I]]
test_vqneg_s8(int8x8_t a)10255 int8x8_t test_vqneg_s8(int8x8_t a) {
10256 return vqneg_s8(a);
10257 }
10258
10259 // CHECK-LABEL: @test_vqneg_s16(
10260 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10261 // CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a)
10262 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
10263 // CHECK: ret <4 x i16> [[VQNEG_V1_I]]
test_vqneg_s16(int16x4_t a)10264 int16x4_t test_vqneg_s16(int16x4_t a) {
10265 return vqneg_s16(a);
10266 }
10267
10268 // CHECK-LABEL: @test_vqneg_s32(
10269 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10270 // CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a)
10271 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
10272 // CHECK: ret <2 x i32> [[VQNEG_V1_I]]
test_vqneg_s32(int32x2_t a)10273 int32x2_t test_vqneg_s32(int32x2_t a) {
10274 return vqneg_s32(a);
10275 }
10276
10277 // CHECK-LABEL: @test_vqnegq_s8(
10278 // CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a)
10279 // CHECK: ret <16 x i8> [[VQNEGQ_V_I]]
test_vqnegq_s8(int8x16_t a)10280 int8x16_t test_vqnegq_s8(int8x16_t a) {
10281 return vqnegq_s8(a);
10282 }
10283
10284 // CHECK-LABEL: @test_vqnegq_s16(
10285 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10286 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a)
10287 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
10288 // CHECK: ret <8 x i16> [[VQNEGQ_V1_I]]
test_vqnegq_s16(int16x8_t a)10289 int16x8_t test_vqnegq_s16(int16x8_t a) {
10290 return vqnegq_s16(a);
10291 }
10292
10293 // CHECK-LABEL: @test_vqnegq_s32(
10294 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10295 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a)
10296 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
10297 // CHECK: ret <4 x i32> [[VQNEGQ_V1_I]]
test_vqnegq_s32(int32x4_t a)10298 int32x4_t test_vqnegq_s32(int32x4_t a) {
10299 return vqnegq_s32(a);
10300 }
10301
10302 // CHECK-LABEL: @test_vqrdmulh_s16(
10303 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10304 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10305 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
10306 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
10307 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)10308 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
10309 return vqrdmulh_s16(a, b);
10310 }
10311
10312 // CHECK-LABEL: @test_vqrdmulh_s32(
10313 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10314 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10315 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
10316 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
10317 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)10318 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
10319 return vqrdmulh_s32(a, b);
10320 }
10321
10322 // CHECK-LABEL: @test_vqrdmulhq_s16(
10323 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10324 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10325 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
10326 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
10327 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)10328 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
10329 return vqrdmulhq_s16(a, b);
10330 }
10331
10332 // CHECK-LABEL: @test_vqrdmulhq_s32(
10333 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10334 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10335 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
10336 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
10337 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)10338 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
10339 return vqrdmulhq_s32(a, b);
10340 }
10341
10342 // CHECK-LABEL: @test_vqrdmulh_lane_s16(
10343 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10344 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10345 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10346 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10347 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10348 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
10349 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
10350 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_lane_s16(int16x4_t a,int16x4_t b)10351 int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
10352 return vqrdmulh_lane_s16(a, b, 3);
10353 }
10354
10355 // CHECK-LABEL: @test_vqrdmulh_lane_s32(
10356 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10357 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10358 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10359 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10360 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10361 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
10362 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
10363 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_lane_s32(int32x2_t a,int32x2_t b)10364 int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) {
10365 return vqrdmulh_lane_s32(a, b, 1);
10366 }
10367
10368 // CHECK-LABEL: @test_vqrdmulhq_lane_s16(
10369 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10370 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10371 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10372 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
10373 // CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
10374 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]]) #8
10375 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
10376 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_lane_s16(int16x8_t a,int16x4_t b)10377 int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
10378 return vqrdmulhq_lane_s16(a, b, 3);
10379 }
10380
10381 // CHECK-LABEL: @test_vqrdmulhq_lane_s32(
10382 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10383 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10384 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10385 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
10386 // CHECK: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
10387 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]]) #8
10388 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
10389 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_lane_s32(int32x4_t a,int32x2_t b)10390 int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
10391 return vqrdmulhq_lane_s32(a, b, 1);
10392 }
10393
10394 // CHECK-LABEL: @test_vqrdmulh_n_s16(
10395 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10396 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10397 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10398 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10399 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10400 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10401 // CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10402 // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8>
10403 // CHECK: ret <4 x i16> [[VQRDMULH_V5_I]]
test_vqrdmulh_n_s16(int16x4_t a,int16_t b)10404 int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
10405 return vqrdmulh_n_s16(a, b);
10406 }
10407
10408 // CHECK-LABEL: @test_vqrdmulh_n_s32(
10409 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10410 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10411 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10412 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10413 // CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10414 // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8>
10415 // CHECK: ret <2 x i32> [[VQRDMULH_V3_I]]
test_vqrdmulh_n_s32(int32x2_t a,int32_t b)10416 int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
10417 return vqrdmulh_n_s32(a, b);
10418 }
10419
10420 // CHECK-LABEL: @test_vqrdmulhq_n_s16(
10421 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10422 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10423 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10424 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10425 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10426 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10427 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10428 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10429 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10430 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
10431 // CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
10432 // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8>
10433 // CHECK: ret <8 x i16> [[VQRDMULHQ_V9_I]]
test_vqrdmulhq_n_s16(int16x8_t a,int16_t b)10434 int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
10435 return vqrdmulhq_n_s16(a, b);
10436 }
10437
10438 // CHECK-LABEL: @test_vqrdmulhq_n_s32(
10439 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10440 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10441 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10442 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10443 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10444 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
10445 // CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
10446 // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8>
10447 // CHECK: ret <4 x i32> [[VQRDMULHQ_V5_I]]
test_vqrdmulhq_n_s32(int32x4_t a,int32_t b)10448 int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
10449 return vqrdmulhq_n_s32(a, b);
10450 }
10451
10452 // CHECK-LABEL: @test_vqrshl_s8(
10453 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10454 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)10455 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
10456 return vqrshl_s8(a, b);
10457 }
10458
10459 // CHECK-LABEL: @test_vqrshl_s16(
10460 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10461 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10462 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10463 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10464 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)10465 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
10466 return vqrshl_s16(a, b);
10467 }
10468
10469 // CHECK-LABEL: @test_vqrshl_s32(
10470 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10471 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10472 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10473 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10474 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)10475 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
10476 return vqrshl_s32(a, b);
10477 }
10478
10479 // CHECK-LABEL: @test_vqrshl_s64(
10480 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10481 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10482 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10483 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10484 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)10485 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
10486 return vqrshl_s64(a, b);
10487 }
10488
10489 // CHECK-LABEL: @test_vqrshl_u8(
10490 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10491 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)10492 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
10493 return vqrshl_u8(a, b);
10494 }
10495
10496 // CHECK-LABEL: @test_vqrshl_u16(
10497 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10498 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10499 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10500 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10501 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)10502 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
10503 return vqrshl_u16(a, b);
10504 }
10505
10506 // CHECK-LABEL: @test_vqrshl_u32(
10507 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10508 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10509 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10510 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10511 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)10512 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
10513 return vqrshl_u32(a, b);
10514 }
10515
10516 // CHECK-LABEL: @test_vqrshl_u64(
10517 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10518 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10519 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10520 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10521 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)10522 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
10523 return vqrshl_u64(a, b);
10524 }
10525
10526 // CHECK-LABEL: @test_vqrshlq_s8(
10527 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10528 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)10529 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
10530 return vqrshlq_s8(a, b);
10531 }
10532
10533 // CHECK-LABEL: @test_vqrshlq_s16(
10534 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10535 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10536 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10537 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10538 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)10539 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
10540 return vqrshlq_s16(a, b);
10541 }
10542
10543 // CHECK-LABEL: @test_vqrshlq_s32(
10544 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10545 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10546 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10547 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10548 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)10549 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
10550 return vqrshlq_s32(a, b);
10551 }
10552
10553 // CHECK-LABEL: @test_vqrshlq_s64(
10554 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10555 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10556 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10557 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10558 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)10559 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
10560 return vqrshlq_s64(a, b);
10561 }
10562
10563 // CHECK-LABEL: @test_vqrshlq_u8(
10564 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10565 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)10566 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
10567 return vqrshlq_u8(a, b);
10568 }
10569
10570 // CHECK-LABEL: @test_vqrshlq_u16(
10571 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10572 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10573 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10574 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10575 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)10576 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
10577 return vqrshlq_u16(a, b);
10578 }
10579
10580 // CHECK-LABEL: @test_vqrshlq_u32(
10581 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10582 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10583 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10584 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10585 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)10586 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
10587 return vqrshlq_u32(a, b);
10588 }
10589
10590 // CHECK-LABEL: @test_vqrshlq_u64(
10591 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10592 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10593 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10594 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10595 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)10596 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
10597 return vqrshlq_u64(a, b);
10598 }
10599
10600 // CHECK-LABEL: @test_vqrshrn_n_s16(
10601 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10602 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10603 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10604 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)10605 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
10606 return vqrshrn_n_s16(a, 1);
10607 }
10608
10609 // CHECK-LABEL: @test_vqrshrn_n_s32(
10610 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10611 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10612 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10613 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)10614 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
10615 return vqrshrn_n_s32(a, 1);
10616 }
10617
10618 // CHECK-LABEL: @test_vqrshrn_n_s64(
10619 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10620 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10621 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10622 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)10623 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
10624 return vqrshrn_n_s64(a, 1);
10625 }
10626
10627 // CHECK-LABEL: @test_vqrshrn_n_u16(
10628 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10629 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10630 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10631 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)10632 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
10633 return vqrshrn_n_u16(a, 1);
10634 }
10635
10636 // CHECK-LABEL: @test_vqrshrn_n_u32(
10637 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10638 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10639 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10640 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)10641 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
10642 return vqrshrn_n_u32(a, 1);
10643 }
10644
10645 // CHECK-LABEL: @test_vqrshrn_n_u64(
10646 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10647 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10648 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10649 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)10650 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
10651 return vqrshrn_n_u64(a, 1);
10652 }
10653
10654 // CHECK-LABEL: @test_vqrshrun_n_s16(
10655 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10656 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10657 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10658 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)10659 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
10660 return vqrshrun_n_s16(a, 1);
10661 }
10662
10663 // CHECK-LABEL: @test_vqrshrun_n_s32(
10664 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10665 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10666 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10667 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)10668 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
10669 return vqrshrun_n_s32(a, 1);
10670 }
10671
10672 // CHECK-LABEL: @test_vqrshrun_n_s64(
10673 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10674 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10675 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
10676 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)10677 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
10678 return vqrshrun_n_s64(a, 1);
10679 }
10680
10681 // CHECK-LABEL: @test_vqshl_s8(
10682 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10683 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)10684 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
10685 return vqshl_s8(a, b);
10686 }
10687
10688 // CHECK-LABEL: @test_vqshl_s16(
10689 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10690 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10691 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10692 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10693 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_s16(int16x4_t a,int16x4_t b)10694 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
10695 return vqshl_s16(a, b);
10696 }
10697
10698 // CHECK-LABEL: @test_vqshl_s32(
10699 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10700 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10701 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10702 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10703 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_s32(int32x2_t a,int32x2_t b)10704 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
10705 return vqshl_s32(a, b);
10706 }
10707
10708 // CHECK-LABEL: @test_vqshl_s64(
10709 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10710 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10711 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10712 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10713 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_s64(int64x1_t a,int64x1_t b)10714 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
10715 return vqshl_s64(a, b);
10716 }
10717
10718 // CHECK-LABEL: @test_vqshl_u8(
10719 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10720 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)10721 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
10722 return vqshl_u8(a, b);
10723 }
10724
10725 // CHECK-LABEL: @test_vqshl_u16(
10726 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10727 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10728 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10729 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10730 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)10731 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
10732 return vqshl_u16(a, b);
10733 }
10734
10735 // CHECK-LABEL: @test_vqshl_u32(
10736 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10737 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10738 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10739 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10740 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)10741 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
10742 return vqshl_u32(a, b);
10743 }
10744
10745 // CHECK-LABEL: @test_vqshl_u64(
10746 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10747 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10748 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10749 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10750 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)10751 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
10752 return vqshl_u64(a, b);
10753 }
10754
10755 // CHECK-LABEL: @test_vqshlq_s8(
10756 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10757 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)10758 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
10759 return vqshlq_s8(a, b);
10760 }
10761
10762 // CHECK-LABEL: @test_vqshlq_s16(
10763 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10764 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10765 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10766 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10767 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)10768 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
10769 return vqshlq_s16(a, b);
10770 }
10771
10772 // CHECK-LABEL: @test_vqshlq_s32(
10773 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10774 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10775 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10776 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10777 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)10778 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
10779 return vqshlq_s32(a, b);
10780 }
10781
10782 // CHECK-LABEL: @test_vqshlq_s64(
10783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10784 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10785 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10786 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10787 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)10788 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
10789 return vqshlq_s64(a, b);
10790 }
10791
10792 // CHECK-LABEL: @test_vqshlq_u8(
10793 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10794 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)10795 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
10796 return vqshlq_u8(a, b);
10797 }
10798
10799 // CHECK-LABEL: @test_vqshlq_u16(
10800 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10801 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10802 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10803 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10804 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)10805 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
10806 return vqshlq_u16(a, b);
10807 }
10808
10809 // CHECK-LABEL: @test_vqshlq_u32(
10810 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10811 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10812 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10813 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10814 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)10815 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
10816 return vqshlq_u32(a, b);
10817 }
10818
10819 // CHECK-LABEL: @test_vqshlq_u64(
10820 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10821 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10822 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10823 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10824 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)10825 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
10826 return vqshlq_u64(a, b);
10827 }
10828
10829 // CHECK-LABEL: @test_vqshlu_n_s8(
10830 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10831 // CHECK: ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)10832 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
10833 return vqshlu_n_s8(a, 1);
10834 }
10835
10836 // CHECK-LABEL: @test_vqshlu_n_s16(
10837 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10838 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10839 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10840 // CHECK: ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)10841 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
10842 return vqshlu_n_s16(a, 1);
10843 }
10844
10845 // CHECK-LABEL: @test_vqshlu_n_s32(
10846 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10847 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10848 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 1, i32 1>)
10849 // CHECK: ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)10850 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
10851 return vqshlu_n_s32(a, 1);
10852 }
10853
10854 // CHECK-LABEL: @test_vqshlu_n_s64(
10855 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10856 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10857 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
10858 // CHECK: ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)10859 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
10860 return vqshlu_n_s64(a, 1);
10861 }
10862
10863 // CHECK-LABEL: @test_vqshluq_n_s8(
10864 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10865 // CHECK: ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)10866 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
10867 return vqshluq_n_s8(a, 1);
10868 }
10869
10870 // CHECK-LABEL: @test_vqshluq_n_s16(
10871 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10872 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10873 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
10874 // CHECK: ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)10875 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
10876 return vqshluq_n_s16(a, 1);
10877 }
10878
10879 // CHECK-LABEL: @test_vqshluq_n_s32(
10880 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10881 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10882 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
10883 // CHECK: ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)10884 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
10885 return vqshluq_n_s32(a, 1);
10886 }
10887
10888 // CHECK-LABEL: @test_vqshluq_n_s64(
10889 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10890 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10891 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 1, i64 1>)
10892 // CHECK: ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)10893 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
10894 return vqshluq_n_s64(a, 1);
10895 }
10896
10897 // CHECK-LABEL: @test_vqshl_n_s8(
10898 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10899 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)10900 int8x8_t test_vqshl_n_s8(int8x8_t a) {
10901 return vqshl_n_s8(a, 1);
10902 }
10903
10904 // CHECK-LABEL: @test_vqshl_n_s16(
10905 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10906 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10907 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10908 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)10909 int16x4_t test_vqshl_n_s16(int16x4_t a) {
10910 return vqshl_n_s16(a, 1);
10911 }
10912
10913 // CHECK-LABEL: @test_vqshl_n_s32(
10914 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10915 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10916 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
10917 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)10918 int32x2_t test_vqshl_n_s32(int32x2_t a) {
10919 return vqshl_n_s32(a, 1);
10920 }
10921
10922 // CHECK-LABEL: @test_vqshl_n_s64(
10923 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10924 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10925 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
10926 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)10927 int64x1_t test_vqshl_n_s64(int64x1_t a) {
10928 return vqshl_n_s64(a, 1);
10929 }
10930
10931 // CHECK-LABEL: @test_vqshl_n_u8(
10932 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10933 // CHECK: ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)10934 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
10935 return vqshl_n_u8(a, 1);
10936 }
10937
10938 // CHECK-LABEL: @test_vqshl_n_u16(
10939 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10940 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10941 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10942 // CHECK: ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)10943 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
10944 return vqshl_n_u16(a, 1);
10945 }
10946
10947 // CHECK-LABEL: @test_vqshl_n_u32(
10948 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10949 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10950 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
10951 // CHECK: ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)10952 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
10953 return vqshl_n_u32(a, 1);
10954 }
10955
10956 // CHECK-LABEL: @test_vqshl_n_u64(
10957 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10958 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10959 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
10960 // CHECK: ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)10961 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
10962 return vqshl_n_u64(a, 1);
10963 }
10964
10965 // CHECK-LABEL: @test_vqshlq_n_s8(
10966 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10967 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)10968 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
10969 return vqshlq_n_s8(a, 1);
10970 }
10971
10972 // CHECK-LABEL: @test_vqshlq_n_s16(
10973 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10974 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10975 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
10976 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)10977 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
10978 return vqshlq_n_s16(a, 1);
10979 }
10980
10981 // CHECK-LABEL: @test_vqshlq_n_s32(
10982 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10983 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10984 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
10985 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)10986 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
10987 return vqshlq_n_s32(a, 1);
10988 }
10989
10990 // CHECK-LABEL: @test_vqshlq_n_s64(
10991 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10992 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10993 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
10994 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)10995 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
10996 return vqshlq_n_s64(a, 1);
10997 }
10998
10999 // CHECK-LABEL: @test_vqshlq_n_u8(
11000 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
11001 // CHECK: ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)11002 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
11003 return vqshlq_n_u8(a, 1);
11004 }
11005
11006 // CHECK-LABEL: @test_vqshlq_n_u16(
11007 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11008 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11009 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
11010 // CHECK: ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)11011 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
11012 return vqshlq_n_u16(a, 1);
11013 }
11014
11015 // CHECK-LABEL: @test_vqshlq_n_u32(
11016 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11017 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11018 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
11019 // CHECK: ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)11020 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
11021 return vqshlq_n_u32(a, 1);
11022 }
11023
11024 // CHECK-LABEL: @test_vqshlq_n_u64(
11025 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11026 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11027 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
11028 // CHECK: ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)11029 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
11030 return vqshlq_n_u64(a, 1);
11031 }
11032
11033 // CHECK-LABEL: @test_vqshrn_n_s16(
11034 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11035 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11036 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11037 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)11038 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
11039 return vqshrn_n_s16(a, 1);
11040 }
11041
11042 // CHECK-LABEL: @test_vqshrn_n_s32(
11043 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11044 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11045 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11046 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)11047 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
11048 return vqshrn_n_s32(a, 1);
11049 }
11050
11051 // CHECK-LABEL: @test_vqshrn_n_s64(
11052 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11053 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11054 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
11055 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)11056 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
11057 return vqshrn_n_s64(a, 1);
11058 }
11059
11060 // CHECK-LABEL: @test_vqshrn_n_u16(
11061 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11062 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11063 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11064 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)11065 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
11066 return vqshrn_n_u16(a, 1);
11067 }
11068
11069 // CHECK-LABEL: @test_vqshrn_n_u32(
11070 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11071 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11072 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11073 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)11074 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
11075 return vqshrn_n_u32(a, 1);
11076 }
11077
11078 // CHECK-LABEL: @test_vqshrn_n_u64(
11079 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11080 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11081 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
11082 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)11083 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
11084 return vqshrn_n_u64(a, 1);
11085 }
11086
11087 // CHECK-LABEL: @test_vqshrun_n_s16(
11088 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11089 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11090 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11091 // CHECK: ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)11092 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
11093 return vqshrun_n_s16(a, 1);
11094 }
11095
11096 // CHECK-LABEL: @test_vqshrun_n_s32(
11097 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11098 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11099 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11100 // CHECK: ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)11101 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
11102 return vqshrun_n_s32(a, 1);
11103 }
11104
11105 // CHECK-LABEL: @test_vqshrun_n_s64(
11106 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11107 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11108 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
11109 // CHECK: ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)11110 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
11111 return vqshrun_n_s64(a, 1);
11112 }
11113
11114 // CHECK-LABEL: @test_vqsub_s8(
11115 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
11116 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)11117 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
11118 return vqsub_s8(a, b);
11119 }
11120
11121 // CHECK-LABEL: @test_vqsub_s16(
11122 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11123 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11124 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
11125 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
11126 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_s16(int16x4_t a,int16x4_t b)11127 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
11128 return vqsub_s16(a, b);
11129 }
11130
11131 // CHECK-LABEL: @test_vqsub_s32(
11132 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11133 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11134 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
11135 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
11136 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_s32(int32x2_t a,int32x2_t b)11137 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
11138 return vqsub_s32(a, b);
11139 }
11140
11141 // CHECK-LABEL: @test_vqsub_s64(
11142 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11143 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11144 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
11145 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
11146 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_s64(int64x1_t a,int64x1_t b)11147 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
11148 return vqsub_s64(a, b);
11149 }
11150
11151 // CHECK-LABEL: @test_vqsub_u8(
11152 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
11153 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)11154 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
11155 return vqsub_u8(a, b);
11156 }
11157
11158 // CHECK-LABEL: @test_vqsub_u16(
11159 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11160 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11161 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
11162 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
11163 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)11164 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
11165 return vqsub_u16(a, b);
11166 }
11167
11168 // CHECK-LABEL: @test_vqsub_u32(
11169 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11170 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11171 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
11172 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
11173 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)11174 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
11175 return vqsub_u32(a, b);
11176 }
11177
11178 // CHECK-LABEL: @test_vqsub_u64(
11179 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11180 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11181 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
11182 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
11183 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)11184 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
11185 return vqsub_u64(a, b);
11186 }
11187
11188 // CHECK-LABEL: @test_vqsubq_s8(
11189 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
11190 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)11191 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
11192 return vqsubq_s8(a, b);
11193 }
11194
11195 // CHECK-LABEL: @test_vqsubq_s16(
11196 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11197 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11198 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
11199 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
11200 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)11201 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
11202 return vqsubq_s16(a, b);
11203 }
11204
11205 // CHECK-LABEL: @test_vqsubq_s32(
11206 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11207 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11208 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
11209 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
11210 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)11211 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
11212 return vqsubq_s32(a, b);
11213 }
11214
11215 // CHECK-LABEL: @test_vqsubq_s64(
11216 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11217 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11218 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
11219 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
11220 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)11221 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
11222 return vqsubq_s64(a, b);
11223 }
11224
11225 // CHECK-LABEL: @test_vqsubq_u8(
11226 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
11227 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)11228 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
11229 return vqsubq_u8(a, b);
11230 }
11231
11232 // CHECK-LABEL: @test_vqsubq_u16(
11233 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11234 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11235 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
11236 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
11237 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)11238 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
11239 return vqsubq_u16(a, b);
11240 }
11241
11242 // CHECK-LABEL: @test_vqsubq_u32(
11243 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11244 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11245 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
11246 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
11247 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)11248 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
11249 return vqsubq_u32(a, b);
11250 }
11251
11252 // CHECK-LABEL: @test_vqsubq_u64(
11253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11254 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11255 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
11256 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
11257 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)11258 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
11259 return vqsubq_u64(a, b);
11260 }
11261
11262 // CHECK-LABEL: @test_vraddhn_s16(
11263 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11264 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11265 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
11266 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)11267 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
11268 return vraddhn_s16(a, b);
11269 }
11270
11271 // CHECK-LABEL: @test_vraddhn_s32(
11272 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11273 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11274 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
11275 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
11276 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)11277 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
11278 return vraddhn_s32(a, b);
11279 }
11280
11281 // CHECK-LABEL: @test_vraddhn_s64(
11282 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11283 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11284 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
11285 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
11286 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)11287 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
11288 return vraddhn_s64(a, b);
11289 }
11290
11291 // CHECK-LABEL: @test_vraddhn_u16(
11292 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11293 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11294 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
11295 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)11296 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
11297 return vraddhn_u16(a, b);
11298 }
11299
11300 // CHECK-LABEL: @test_vraddhn_u32(
11301 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11302 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11303 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
11304 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
11305 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)11306 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
11307 return vraddhn_u32(a, b);
11308 }
11309
11310 // CHECK-LABEL: @test_vraddhn_u64(
11311 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11312 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11313 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
11314 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
11315 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)11316 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
11317 return vraddhn_u64(a, b);
11318 }
11319
11320 // CHECK-LABEL: @test_vrecpe_f32(
11321 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11322 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a)
11323 // CHECK: ret <2 x float> [[VRECPE_V1_I]]
test_vrecpe_f32(float32x2_t a)11324 float32x2_t test_vrecpe_f32(float32x2_t a) {
11325 return vrecpe_f32(a);
11326 }
11327
11328 // CHECK-LABEL: @test_vrecpe_u32(
11329 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11330 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a)
11331 // CHECK: ret <2 x i32> [[VRECPE_V1_I]]
test_vrecpe_u32(uint32x2_t a)11332 uint32x2_t test_vrecpe_u32(uint32x2_t a) {
11333 return vrecpe_u32(a);
11334 }
11335
11336 // CHECK-LABEL: @test_vrecpeq_f32(
11337 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
11338 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a)
11339 // CHECK: ret <4 x float> [[VRECPEQ_V1_I]]
test_vrecpeq_f32(float32x4_t a)11340 float32x4_t test_vrecpeq_f32(float32x4_t a) {
11341 return vrecpeq_f32(a);
11342 }
11343
11344 // CHECK-LABEL: @test_vrecpeq_u32(
11345 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11346 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a)
11347 // CHECK: ret <4 x i32> [[VRECPEQ_V1_I]]
test_vrecpeq_u32(uint32x4_t a)11348 uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
11349 return vrecpeq_u32(a);
11350 }
11351
11352 // CHECK-LABEL: @test_vrecps_f32(
11353 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11354 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11355 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b)
11356 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
11357 // CHECK: ret <2 x float> [[VRECPS_V2_I]]
test_vrecps_f32(float32x2_t a,float32x2_t b)11358 float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) {
11359 return vrecps_f32(a, b);
11360 }
11361
11362 // CHECK-LABEL: @test_vrecpsq_f32(
11363 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
11364 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
11365 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b)
11366 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
11367 // CHECK: ret <4 x float> [[VRECPSQ_V2_I]]
test_vrecpsq_f32(float32x4_t a,float32x4_t b)11368 float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) {
11369 return vrecpsq_f32(a, b);
11370 }
11371
11372 // CHECK-LABEL: @test_vreinterpret_s8_s16(
11373 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11374 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)11375 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
11376 return vreinterpret_s8_s16(a);
11377 }
11378
11379 // CHECK-LABEL: @test_vreinterpret_s8_s32(
11380 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11381 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)11382 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
11383 return vreinterpret_s8_s32(a);
11384 }
11385
11386 // CHECK-LABEL: @test_vreinterpret_s8_s64(
11387 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11388 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)11389 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
11390 return vreinterpret_s8_s64(a);
11391 }
11392
11393 // CHECK-LABEL: @test_vreinterpret_s8_u8(
11394 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)11395 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
11396 return vreinterpret_s8_u8(a);
11397 }
11398
11399 // CHECK-LABEL: @test_vreinterpret_s8_u16(
11400 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11401 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)11402 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
11403 return vreinterpret_s8_u16(a);
11404 }
11405
11406 // CHECK-LABEL: @test_vreinterpret_s8_u32(
11407 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11408 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)11409 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
11410 return vreinterpret_s8_u32(a);
11411 }
11412
11413 // CHECK-LABEL: @test_vreinterpret_s8_u64(
11414 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11415 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)11416 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
11417 return vreinterpret_s8_u64(a);
11418 }
11419
11420 // CHECK-LABEL: @test_vreinterpret_s8_f16(
11421 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11422 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)11423 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
11424 return vreinterpret_s8_f16(a);
11425 }
11426
11427 // CHECK-LABEL: @test_vreinterpret_s8_f32(
11428 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11429 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)11430 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
11431 return vreinterpret_s8_f32(a);
11432 }
11433
11434 // CHECK-LABEL: @test_vreinterpret_s8_p8(
11435 // CHECK: ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)11436 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
11437 return vreinterpret_s8_p8(a);
11438 }
11439
11440 // CHECK-LABEL: @test_vreinterpret_s8_p16(
11441 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11442 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)11443 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
11444 return vreinterpret_s8_p16(a);
11445 }
11446
11447 // CHECK-LABEL: @test_vreinterpret_s16_s8(
11448 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11449 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)11450 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
11451 return vreinterpret_s16_s8(a);
11452 }
11453
11454 // CHECK-LABEL: @test_vreinterpret_s16_s32(
11455 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11456 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)11457 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
11458 return vreinterpret_s16_s32(a);
11459 }
11460
11461 // CHECK-LABEL: @test_vreinterpret_s16_s64(
11462 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11463 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)11464 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
11465 return vreinterpret_s16_s64(a);
11466 }
11467
11468 // CHECK-LABEL: @test_vreinterpret_s16_u8(
11469 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11470 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)11471 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
11472 return vreinterpret_s16_u8(a);
11473 }
11474
11475 // CHECK-LABEL: @test_vreinterpret_s16_u16(
11476 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)11477 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
11478 return vreinterpret_s16_u16(a);
11479 }
11480
11481 // CHECK-LABEL: @test_vreinterpret_s16_u32(
11482 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11483 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)11484 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
11485 return vreinterpret_s16_u32(a);
11486 }
11487
11488 // CHECK-LABEL: @test_vreinterpret_s16_u64(
11489 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11490 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)11491 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
11492 return vreinterpret_s16_u64(a);
11493 }
11494
11495 // CHECK-LABEL: @test_vreinterpret_s16_f16(
11496 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11497 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)11498 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
11499 return vreinterpret_s16_f16(a);
11500 }
11501
11502 // CHECK-LABEL: @test_vreinterpret_s16_f32(
11503 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11504 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)11505 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
11506 return vreinterpret_s16_f32(a);
11507 }
11508
11509 // CHECK-LABEL: @test_vreinterpret_s16_p8(
11510 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11511 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)11512 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
11513 return vreinterpret_s16_p8(a);
11514 }
11515
11516 // CHECK-LABEL: @test_vreinterpret_s16_p16(
11517 // CHECK: ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)11518 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
11519 return vreinterpret_s16_p16(a);
11520 }
11521
11522 // CHECK-LABEL: @test_vreinterpret_s32_s8(
11523 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11524 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)11525 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
11526 return vreinterpret_s32_s8(a);
11527 }
11528
11529 // CHECK-LABEL: @test_vreinterpret_s32_s16(
11530 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11531 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)11532 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
11533 return vreinterpret_s32_s16(a);
11534 }
11535
11536 // CHECK-LABEL: @test_vreinterpret_s32_s64(
11537 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11538 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)11539 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
11540 return vreinterpret_s32_s64(a);
11541 }
11542
11543 // CHECK-LABEL: @test_vreinterpret_s32_u8(
11544 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11545 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)11546 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
11547 return vreinterpret_s32_u8(a);
11548 }
11549
11550 // CHECK-LABEL: @test_vreinterpret_s32_u16(
11551 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11552 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)11553 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
11554 return vreinterpret_s32_u16(a);
11555 }
11556
11557 // CHECK-LABEL: @test_vreinterpret_s32_u32(
11558 // CHECK: ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)11559 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
11560 return vreinterpret_s32_u32(a);
11561 }
11562
11563 // CHECK-LABEL: @test_vreinterpret_s32_u64(
11564 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11565 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)11566 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
11567 return vreinterpret_s32_u64(a);
11568 }
11569
11570 // CHECK-LABEL: @test_vreinterpret_s32_f16(
11571 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11572 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)11573 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
11574 return vreinterpret_s32_f16(a);
11575 }
11576
11577 // CHECK-LABEL: @test_vreinterpret_s32_f32(
11578 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11579 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)11580 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
11581 return vreinterpret_s32_f32(a);
11582 }
11583
11584 // CHECK-LABEL: @test_vreinterpret_s32_p8(
11585 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11586 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)11587 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
11588 return vreinterpret_s32_p8(a);
11589 }
11590
11591 // CHECK-LABEL: @test_vreinterpret_s32_p16(
11592 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11593 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)11594 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
11595 return vreinterpret_s32_p16(a);
11596 }
11597
11598 // CHECK-LABEL: @test_vreinterpret_s64_s8(
11599 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11600 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)11601 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
11602 return vreinterpret_s64_s8(a);
11603 }
11604
11605 // CHECK-LABEL: @test_vreinterpret_s64_s16(
11606 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11607 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)11608 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
11609 return vreinterpret_s64_s16(a);
11610 }
11611
11612 // CHECK-LABEL: @test_vreinterpret_s64_s32(
11613 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11614 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)11615 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
11616 return vreinterpret_s64_s32(a);
11617 }
11618
11619 // CHECK-LABEL: @test_vreinterpret_s64_u8(
11620 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11621 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)11622 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
11623 return vreinterpret_s64_u8(a);
11624 }
11625
11626 // CHECK-LABEL: @test_vreinterpret_s64_u16(
11627 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11628 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)11629 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
11630 return vreinterpret_s64_u16(a);
11631 }
11632
11633 // CHECK-LABEL: @test_vreinterpret_s64_u32(
11634 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11635 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)11636 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
11637 return vreinterpret_s64_u32(a);
11638 }
11639
11640 // CHECK-LABEL: @test_vreinterpret_s64_u64(
11641 // CHECK: ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)11642 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
11643 return vreinterpret_s64_u64(a);
11644 }
11645
11646 // CHECK-LABEL: @test_vreinterpret_s64_f16(
11647 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
11648 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)11649 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
11650 return vreinterpret_s64_f16(a);
11651 }
11652
11653 // CHECK-LABEL: @test_vreinterpret_s64_f32(
11654 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
11655 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)11656 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
11657 return vreinterpret_s64_f32(a);
11658 }
11659
11660 // CHECK-LABEL: @test_vreinterpret_s64_p8(
11661 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11662 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)11663 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
11664 return vreinterpret_s64_p8(a);
11665 }
11666
11667 // CHECK-LABEL: @test_vreinterpret_s64_p16(
11668 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11669 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)11670 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
11671 return vreinterpret_s64_p16(a);
11672 }
11673
11674 // CHECK-LABEL: @test_vreinterpret_u8_s8(
11675 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)11676 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
11677 return vreinterpret_u8_s8(a);
11678 }
11679
11680 // CHECK-LABEL: @test_vreinterpret_u8_s16(
11681 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11682 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)11683 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
11684 return vreinterpret_u8_s16(a);
11685 }
11686
11687 // CHECK-LABEL: @test_vreinterpret_u8_s32(
11688 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11689 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)11690 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
11691 return vreinterpret_u8_s32(a);
11692 }
11693
11694 // CHECK-LABEL: @test_vreinterpret_u8_s64(
11695 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11696 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)11697 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
11698 return vreinterpret_u8_s64(a);
11699 }
11700
11701 // CHECK-LABEL: @test_vreinterpret_u8_u16(
11702 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11703 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)11704 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
11705 return vreinterpret_u8_u16(a);
11706 }
11707
11708 // CHECK-LABEL: @test_vreinterpret_u8_u32(
11709 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11710 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)11711 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
11712 return vreinterpret_u8_u32(a);
11713 }
11714
11715 // CHECK-LABEL: @test_vreinterpret_u8_u64(
11716 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11717 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)11718 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
11719 return vreinterpret_u8_u64(a);
11720 }
11721
11722 // CHECK-LABEL: @test_vreinterpret_u8_f16(
11723 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11724 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)11725 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
11726 return vreinterpret_u8_f16(a);
11727 }
11728
11729 // CHECK-LABEL: @test_vreinterpret_u8_f32(
11730 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11731 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)11732 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
11733 return vreinterpret_u8_f32(a);
11734 }
11735
11736 // CHECK-LABEL: @test_vreinterpret_u8_p8(
11737 // CHECK: ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)11738 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
11739 return vreinterpret_u8_p8(a);
11740 }
11741
11742 // CHECK-LABEL: @test_vreinterpret_u8_p16(
11743 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11744 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)11745 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
11746 return vreinterpret_u8_p16(a);
11747 }
11748
11749 // CHECK-LABEL: @test_vreinterpret_u16_s8(
11750 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11751 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)11752 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
11753 return vreinterpret_u16_s8(a);
11754 }
11755
11756 // CHECK-LABEL: @test_vreinterpret_u16_s16(
11757 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)11758 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
11759 return vreinterpret_u16_s16(a);
11760 }
11761
11762 // CHECK-LABEL: @test_vreinterpret_u16_s32(
11763 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11764 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)11765 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
11766 return vreinterpret_u16_s32(a);
11767 }
11768
11769 // CHECK-LABEL: @test_vreinterpret_u16_s64(
11770 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11771 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)11772 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
11773 return vreinterpret_u16_s64(a);
11774 }
11775
11776 // CHECK-LABEL: @test_vreinterpret_u16_u8(
11777 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11778 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)11779 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
11780 return vreinterpret_u16_u8(a);
11781 }
11782
11783 // CHECK-LABEL: @test_vreinterpret_u16_u32(
11784 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11785 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)11786 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
11787 return vreinterpret_u16_u32(a);
11788 }
11789
11790 // CHECK-LABEL: @test_vreinterpret_u16_u64(
11791 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11792 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)11793 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
11794 return vreinterpret_u16_u64(a);
11795 }
11796
11797 // CHECK-LABEL: @test_vreinterpret_u16_f16(
11798 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11799 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)11800 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
11801 return vreinterpret_u16_f16(a);
11802 }
11803
11804 // CHECK-LABEL: @test_vreinterpret_u16_f32(
11805 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11806 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)11807 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
11808 return vreinterpret_u16_f32(a);
11809 }
11810
11811 // CHECK-LABEL: @test_vreinterpret_u16_p8(
11812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11813 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)11814 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
11815 return vreinterpret_u16_p8(a);
11816 }
11817
11818 // CHECK-LABEL: @test_vreinterpret_u16_p16(
11819 // CHECK: ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)11820 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
11821 return vreinterpret_u16_p16(a);
11822 }
11823
11824 // CHECK-LABEL: @test_vreinterpret_u32_s8(
11825 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11826 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)11827 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
11828 return vreinterpret_u32_s8(a);
11829 }
11830
11831 // CHECK-LABEL: @test_vreinterpret_u32_s16(
11832 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11833 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)11834 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
11835 return vreinterpret_u32_s16(a);
11836 }
11837
11838 // CHECK-LABEL: @test_vreinterpret_u32_s32(
11839 // CHECK: ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)11840 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
11841 return vreinterpret_u32_s32(a);
11842 }
11843
11844 // CHECK-LABEL: @test_vreinterpret_u32_s64(
11845 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11846 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)11847 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
11848 return vreinterpret_u32_s64(a);
11849 }
11850
11851 // CHECK-LABEL: @test_vreinterpret_u32_u8(
11852 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11853 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)11854 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
11855 return vreinterpret_u32_u8(a);
11856 }
11857
11858 // CHECK-LABEL: @test_vreinterpret_u32_u16(
11859 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11860 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)11861 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
11862 return vreinterpret_u32_u16(a);
11863 }
11864
11865 // CHECK-LABEL: @test_vreinterpret_u32_u64(
11866 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11867 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)11868 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
11869 return vreinterpret_u32_u64(a);
11870 }
11871
11872 // CHECK-LABEL: @test_vreinterpret_u32_f16(
11873 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11874 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)11875 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
11876 return vreinterpret_u32_f16(a);
11877 }
11878
11879 // CHECK-LABEL: @test_vreinterpret_u32_f32(
11880 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11881 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)11882 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
11883 return vreinterpret_u32_f32(a);
11884 }
11885
11886 // CHECK-LABEL: @test_vreinterpret_u32_p8(
11887 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11888 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)11889 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
11890 return vreinterpret_u32_p8(a);
11891 }
11892
11893 // CHECK-LABEL: @test_vreinterpret_u32_p16(
11894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11895 // CHECK: ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)11896 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
11897 return vreinterpret_u32_p16(a);
11898 }
11899
11900 // CHECK-LABEL: @test_vreinterpret_u64_s8(
11901 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11902 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)11903 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
11904 return vreinterpret_u64_s8(a);
11905 }
11906
11907 // CHECK-LABEL: @test_vreinterpret_u64_s16(
11908 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11909 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)11910 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
11911 return vreinterpret_u64_s16(a);
11912 }
11913
11914 // CHECK-LABEL: @test_vreinterpret_u64_s32(
11915 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11916 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)11917 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
11918 return vreinterpret_u64_s32(a);
11919 }
11920
11921 // CHECK-LABEL: @test_vreinterpret_u64_s64(
11922 // CHECK: ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)11923 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
11924 return vreinterpret_u64_s64(a);
11925 }
11926
11927 // CHECK-LABEL: @test_vreinterpret_u64_u8(
11928 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11929 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)11930 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
11931 return vreinterpret_u64_u8(a);
11932 }
11933
11934 // CHECK-LABEL: @test_vreinterpret_u64_u16(
11935 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11936 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)11937 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
11938 return vreinterpret_u64_u16(a);
11939 }
11940
11941 // CHECK-LABEL: @test_vreinterpret_u64_u32(
11942 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11943 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)11944 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
11945 return vreinterpret_u64_u32(a);
11946 }
11947
11948 // CHECK-LABEL: @test_vreinterpret_u64_f16(
11949 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
11950 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)11951 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
11952 return vreinterpret_u64_f16(a);
11953 }
11954
11955 // CHECK-LABEL: @test_vreinterpret_u64_f32(
11956 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
11957 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)11958 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
11959 return vreinterpret_u64_f32(a);
11960 }
11961
11962 // CHECK-LABEL: @test_vreinterpret_u64_p8(
11963 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11964 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)11965 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
11966 return vreinterpret_u64_p8(a);
11967 }
11968
11969 // CHECK-LABEL: @test_vreinterpret_u64_p16(
11970 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11971 // CHECK: ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)11972 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
11973 return vreinterpret_u64_p16(a);
11974 }
11975
11976 // CHECK-LABEL: @test_vreinterpret_f16_s8(
11977 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
11978 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)11979 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
11980 return vreinterpret_f16_s8(a);
11981 }
11982
11983 // CHECK-LABEL: @test_vreinterpret_f16_s16(
11984 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
11985 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)11986 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
11987 return vreinterpret_f16_s16(a);
11988 }
11989
11990 // CHECK-LABEL: @test_vreinterpret_f16_s32(
11991 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
11992 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)11993 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
11994 return vreinterpret_f16_s32(a);
11995 }
11996
11997 // CHECK-LABEL: @test_vreinterpret_f16_s64(
11998 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
11999 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)12000 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
12001 return vreinterpret_f16_s64(a);
12002 }
12003
12004 // CHECK-LABEL: @test_vreinterpret_f16_u8(
12005 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12006 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)12007 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
12008 return vreinterpret_f16_u8(a);
12009 }
12010
12011 // CHECK-LABEL: @test_vreinterpret_f16_u16(
12012 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12013 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)12014 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
12015 return vreinterpret_f16_u16(a);
12016 }
12017
12018 // CHECK-LABEL: @test_vreinterpret_f16_u32(
12019 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
12020 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)12021 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
12022 return vreinterpret_f16_u32(a);
12023 }
12024
12025 // CHECK-LABEL: @test_vreinterpret_f16_u64(
12026 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
12027 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)12028 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
12029 return vreinterpret_f16_u64(a);
12030 }
12031
12032 // CHECK-LABEL: @test_vreinterpret_f16_f32(
12033 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
12034 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)12035 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
12036 return vreinterpret_f16_f32(a);
12037 }
12038
12039 // CHECK-LABEL: @test_vreinterpret_f16_p8(
12040 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12041 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)12042 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
12043 return vreinterpret_f16_p8(a);
12044 }
12045
12046 // CHECK-LABEL: @test_vreinterpret_f16_p16(
12047 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12048 // CHECK: ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)12049 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
12050 return vreinterpret_f16_p16(a);
12051 }
12052
12053 // CHECK-LABEL: @test_vreinterpret_f32_s8(
12054 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12055 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)12056 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
12057 return vreinterpret_f32_s8(a);
12058 }
12059
12060 // CHECK-LABEL: @test_vreinterpret_f32_s16(
12061 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12062 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)12063 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
12064 return vreinterpret_f32_s16(a);
12065 }
12066
12067 // CHECK-LABEL: @test_vreinterpret_f32_s32(
12068 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
12069 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)12070 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
12071 return vreinterpret_f32_s32(a);
12072 }
12073
12074 // CHECK-LABEL: @test_vreinterpret_f32_s64(
12075 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
12076 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)12077 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
12078 return vreinterpret_f32_s64(a);
12079 }
12080
12081 // CHECK-LABEL: @test_vreinterpret_f32_u8(
12082 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12083 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)12084 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
12085 return vreinterpret_f32_u8(a);
12086 }
12087
12088 // CHECK-LABEL: @test_vreinterpret_f32_u16(
12089 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12090 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)12091 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
12092 return vreinterpret_f32_u16(a);
12093 }
12094
12095 // CHECK-LABEL: @test_vreinterpret_f32_u32(
12096 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
12097 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)12098 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
12099 return vreinterpret_f32_u32(a);
12100 }
12101
12102 // CHECK-LABEL: @test_vreinterpret_f32_u64(
12103 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
12104 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)12105 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
12106 return vreinterpret_f32_u64(a);
12107 }
12108
12109 // CHECK-LABEL: @test_vreinterpret_f32_f16(
12110 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
12111 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)12112 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
12113 return vreinterpret_f32_f16(a);
12114 }
12115
12116 // CHECK-LABEL: @test_vreinterpret_f32_p8(
12117 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12118 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)12119 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
12120 return vreinterpret_f32_p8(a);
12121 }
12122
12123 // CHECK-LABEL: @test_vreinterpret_f32_p16(
12124 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12125 // CHECK: ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)12126 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
12127 return vreinterpret_f32_p16(a);
12128 }
12129
12130 // CHECK-LABEL: @test_vreinterpret_p8_s8(
12131 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)12132 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
12133 return vreinterpret_p8_s8(a);
12134 }
12135
12136 // CHECK-LABEL: @test_vreinterpret_p8_s16(
12137 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12138 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)12139 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
12140 return vreinterpret_p8_s16(a);
12141 }
12142
12143 // CHECK-LABEL: @test_vreinterpret_p8_s32(
12144 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12145 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)12146 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
12147 return vreinterpret_p8_s32(a);
12148 }
12149
12150 // CHECK-LABEL: @test_vreinterpret_p8_s64(
12151 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12152 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)12153 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
12154 return vreinterpret_p8_s64(a);
12155 }
12156
12157 // CHECK-LABEL: @test_vreinterpret_p8_u8(
12158 // CHECK: ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)12159 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
12160 return vreinterpret_p8_u8(a);
12161 }
12162
12163 // CHECK-LABEL: @test_vreinterpret_p8_u16(
12164 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12165 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)12166 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
12167 return vreinterpret_p8_u16(a);
12168 }
12169
12170 // CHECK-LABEL: @test_vreinterpret_p8_u32(
12171 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12172 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)12173 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
12174 return vreinterpret_p8_u32(a);
12175 }
12176
12177 // CHECK-LABEL: @test_vreinterpret_p8_u64(
12178 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12179 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)12180 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
12181 return vreinterpret_p8_u64(a);
12182 }
12183
12184 // CHECK-LABEL: @test_vreinterpret_p8_f16(
12185 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
12186 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)12187 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
12188 return vreinterpret_p8_f16(a);
12189 }
12190
12191 // CHECK-LABEL: @test_vreinterpret_p8_f32(
12192 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
12193 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)12194 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
12195 return vreinterpret_p8_f32(a);
12196 }
12197
12198 // CHECK-LABEL: @test_vreinterpret_p8_p16(
12199 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12200 // CHECK: ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)12201 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
12202 return vreinterpret_p8_p16(a);
12203 }
12204
12205 // CHECK-LABEL: @test_vreinterpret_p16_s8(
12206 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12207 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)12208 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
12209 return vreinterpret_p16_s8(a);
12210 }
12211
12212 // CHECK-LABEL: @test_vreinterpret_p16_s16(
12213 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)12214 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
12215 return vreinterpret_p16_s16(a);
12216 }
12217
12218 // CHECK-LABEL: @test_vreinterpret_p16_s32(
12219 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
12220 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)12221 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
12222 return vreinterpret_p16_s32(a);
12223 }
12224
12225 // CHECK-LABEL: @test_vreinterpret_p16_s64(
12226 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
12227 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)12228 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
12229 return vreinterpret_p16_s64(a);
12230 }
12231
12232 // CHECK-LABEL: @test_vreinterpret_p16_u8(
12233 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12234 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)12235 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
12236 return vreinterpret_p16_u8(a);
12237 }
12238
12239 // CHECK-LABEL: @test_vreinterpret_p16_u16(
12240 // CHECK: ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)12241 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
12242 return vreinterpret_p16_u16(a);
12243 }
12244
12245 // CHECK-LABEL: @test_vreinterpret_p16_u32(
12246 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
12247 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)12248 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
12249 return vreinterpret_p16_u32(a);
12250 }
12251
12252 // CHECK-LABEL: @test_vreinterpret_p16_u64(
12253 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
12254 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)12255 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
12256 return vreinterpret_p16_u64(a);
12257 }
12258
12259 // CHECK-LABEL: @test_vreinterpret_p16_f16(
12260 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
12261 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)12262 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
12263 return vreinterpret_p16_f16(a);
12264 }
12265
12266 // CHECK-LABEL: @test_vreinterpret_p16_f32(
12267 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
12268 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)12269 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
12270 return vreinterpret_p16_f32(a);
12271 }
12272
12273 // CHECK-LABEL: @test_vreinterpret_p16_p8(
12274 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12275 // CHECK: ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)12276 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
12277 return vreinterpret_p16_p8(a);
12278 }
12279
12280 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
12281 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12282 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)12283 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
12284 return vreinterpretq_s8_s16(a);
12285 }
12286
12287 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
12288 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12289 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)12290 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
12291 return vreinterpretq_s8_s32(a);
12292 }
12293
12294 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
12295 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12296 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)12297 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
12298 return vreinterpretq_s8_s64(a);
12299 }
12300
12301 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
12302 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)12303 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
12304 return vreinterpretq_s8_u8(a);
12305 }
12306
12307 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
12308 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12309 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)12310 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
12311 return vreinterpretq_s8_u16(a);
12312 }
12313
12314 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
12315 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12316 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)12317 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
12318 return vreinterpretq_s8_u32(a);
12319 }
12320
12321 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
12322 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12323 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)12324 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
12325 return vreinterpretq_s8_u64(a);
12326 }
12327
12328 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
12329 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12330 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)12331 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
12332 return vreinterpretq_s8_f16(a);
12333 }
12334
12335 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
12336 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12337 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)12338 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
12339 return vreinterpretq_s8_f32(a);
12340 }
12341
12342 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
12343 // CHECK: ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)12344 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
12345 return vreinterpretq_s8_p8(a);
12346 }
12347
12348 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
12349 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12350 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)12351 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
12352 return vreinterpretq_s8_p16(a);
12353 }
12354
12355 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
12356 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12357 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)12358 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
12359 return vreinterpretq_s16_s8(a);
12360 }
12361
12362 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
12363 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12364 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)12365 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
12366 return vreinterpretq_s16_s32(a);
12367 }
12368
12369 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
12370 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12371 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)12372 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
12373 return vreinterpretq_s16_s64(a);
12374 }
12375
12376 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
12377 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12378 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)12379 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
12380 return vreinterpretq_s16_u8(a);
12381 }
12382
12383 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
12384 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)12385 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
12386 return vreinterpretq_s16_u16(a);
12387 }
12388
12389 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
12390 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12391 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)12392 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
12393 return vreinterpretq_s16_u32(a);
12394 }
12395
12396 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
12397 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12398 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)12399 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
12400 return vreinterpretq_s16_u64(a);
12401 }
12402
12403 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
12404 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12405 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)12406 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
12407 return vreinterpretq_s16_f16(a);
12408 }
12409
12410 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
12411 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12412 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)12413 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
12414 return vreinterpretq_s16_f32(a);
12415 }
12416
12417 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
12418 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12419 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)12420 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
12421 return vreinterpretq_s16_p8(a);
12422 }
12423
12424 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
12425 // CHECK: ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)12426 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
12427 return vreinterpretq_s16_p16(a);
12428 }
12429
12430 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
12431 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12432 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)12433 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
12434 return vreinterpretq_s32_s8(a);
12435 }
12436
12437 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
12438 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12439 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)12440 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
12441 return vreinterpretq_s32_s16(a);
12442 }
12443
12444 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
12445 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12446 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)12447 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
12448 return vreinterpretq_s32_s64(a);
12449 }
12450
12451 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
12452 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12453 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)12454 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
12455 return vreinterpretq_s32_u8(a);
12456 }
12457
12458 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
12459 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12460 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)12461 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
12462 return vreinterpretq_s32_u16(a);
12463 }
12464
12465 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
12466 // CHECK: ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)12467 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
12468 return vreinterpretq_s32_u32(a);
12469 }
12470
12471 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
12472 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12473 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)12474 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
12475 return vreinterpretq_s32_u64(a);
12476 }
12477
12478 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
12479 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12480 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)12481 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
12482 return vreinterpretq_s32_f16(a);
12483 }
12484
12485 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
12486 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12487 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)12488 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
12489 return vreinterpretq_s32_f32(a);
12490 }
12491
12492 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
12493 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12494 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)12495 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
12496 return vreinterpretq_s32_p8(a);
12497 }
12498
12499 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
12500 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12501 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)12502 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
12503 return vreinterpretq_s32_p16(a);
12504 }
12505
12506 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
12507 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12508 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)12509 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
12510 return vreinterpretq_s64_s8(a);
12511 }
12512
12513 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
12514 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12515 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)12516 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
12517 return vreinterpretq_s64_s16(a);
12518 }
12519
12520 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
12521 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12522 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)12523 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
12524 return vreinterpretq_s64_s32(a);
12525 }
12526
12527 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
12528 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12529 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)12530 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
12531 return vreinterpretq_s64_u8(a);
12532 }
12533
12534 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
12535 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12536 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)12537 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
12538 return vreinterpretq_s64_u16(a);
12539 }
12540
12541 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
12542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12543 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)12544 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
12545 return vreinterpretq_s64_u32(a);
12546 }
12547
12548 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
12549 // CHECK: ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)12550 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
12551 return vreinterpretq_s64_u64(a);
12552 }
12553
12554 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
12555 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12556 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)12557 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
12558 return vreinterpretq_s64_f16(a);
12559 }
12560
12561 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
12562 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12563 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)12564 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
12565 return vreinterpretq_s64_f32(a);
12566 }
12567
12568 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
12569 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12570 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)12571 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
12572 return vreinterpretq_s64_p8(a);
12573 }
12574
12575 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
12576 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12577 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)12578 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
12579 return vreinterpretq_s64_p16(a);
12580 }
12581
12582 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
12583 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)12584 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
12585 return vreinterpretq_u8_s8(a);
12586 }
12587
12588 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
12589 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12590 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)12591 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
12592 return vreinterpretq_u8_s16(a);
12593 }
12594
12595 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
12596 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12597 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)12598 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
12599 return vreinterpretq_u8_s32(a);
12600 }
12601
12602 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
12603 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12604 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)12605 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
12606 return vreinterpretq_u8_s64(a);
12607 }
12608
12609 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
12610 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12611 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)12612 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
12613 return vreinterpretq_u8_u16(a);
12614 }
12615
12616 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
12617 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12618 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)12619 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
12620 return vreinterpretq_u8_u32(a);
12621 }
12622
12623 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
12624 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12625 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)12626 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
12627 return vreinterpretq_u8_u64(a);
12628 }
12629
12630 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
12631 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12632 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)12633 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
12634 return vreinterpretq_u8_f16(a);
12635 }
12636
12637 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
12638 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12639 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)12640 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
12641 return vreinterpretq_u8_f32(a);
12642 }
12643
12644 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
12645 // CHECK: ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)12646 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
12647 return vreinterpretq_u8_p8(a);
12648 }
12649
12650 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
12651 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12652 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)12653 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
12654 return vreinterpretq_u8_p16(a);
12655 }
12656
12657 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
12658 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12659 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)12660 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
12661 return vreinterpretq_u16_s8(a);
12662 }
12663
12664 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
12665 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)12666 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
12667 return vreinterpretq_u16_s16(a);
12668 }
12669
12670 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
12671 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12672 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)12673 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
12674 return vreinterpretq_u16_s32(a);
12675 }
12676
12677 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
12678 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12679 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)12680 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
12681 return vreinterpretq_u16_s64(a);
12682 }
12683
12684 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
12685 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12686 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)12687 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
12688 return vreinterpretq_u16_u8(a);
12689 }
12690
12691 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
12692 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12693 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)12694 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
12695 return vreinterpretq_u16_u32(a);
12696 }
12697
12698 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
12699 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12700 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)12701 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
12702 return vreinterpretq_u16_u64(a);
12703 }
12704
12705 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
12706 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12707 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)12708 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
12709 return vreinterpretq_u16_f16(a);
12710 }
12711
12712 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
12713 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12714 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)12715 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
12716 return vreinterpretq_u16_f32(a);
12717 }
12718
12719 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
12720 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12721 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)12722 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
12723 return vreinterpretq_u16_p8(a);
12724 }
12725
12726 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
12727 // CHECK: ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)12728 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
12729 return vreinterpretq_u16_p16(a);
12730 }
12731
12732 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
12733 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12734 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)12735 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
12736 return vreinterpretq_u32_s8(a);
12737 }
12738
12739 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
12740 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12741 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)12742 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
12743 return vreinterpretq_u32_s16(a);
12744 }
12745
12746 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
12747 // CHECK: ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)12748 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
12749 return vreinterpretq_u32_s32(a);
12750 }
12751
12752 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
12753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12754 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)12755 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
12756 return vreinterpretq_u32_s64(a);
12757 }
12758
12759 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
12760 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12761 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)12762 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
12763 return vreinterpretq_u32_u8(a);
12764 }
12765
12766 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
12767 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12768 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)12769 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
12770 return vreinterpretq_u32_u16(a);
12771 }
12772
12773 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
12774 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12775 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)12776 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
12777 return vreinterpretq_u32_u64(a);
12778 }
12779
12780 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
12781 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12782 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)12783 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
12784 return vreinterpretq_u32_f16(a);
12785 }
12786
12787 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
12788 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12789 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)12790 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
12791 return vreinterpretq_u32_f32(a);
12792 }
12793
12794 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
12795 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12796 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)12797 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
12798 return vreinterpretq_u32_p8(a);
12799 }
12800
12801 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
12802 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12803 // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)12804 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
12805 return vreinterpretq_u32_p16(a);
12806 }
12807
12808 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
12809 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12810 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)12811 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
12812 return vreinterpretq_u64_s8(a);
12813 }
12814
12815 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
12816 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12817 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)12818 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
12819 return vreinterpretq_u64_s16(a);
12820 }
12821
12822 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
12823 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12824 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)12825 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
12826 return vreinterpretq_u64_s32(a);
12827 }
12828
12829 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
12830 // CHECK: ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)12831 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
12832 return vreinterpretq_u64_s64(a);
12833 }
12834
12835 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
12836 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12837 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)12838 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
12839 return vreinterpretq_u64_u8(a);
12840 }
12841
12842 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
12843 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12844 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)12845 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
12846 return vreinterpretq_u64_u16(a);
12847 }
12848
12849 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
12850 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12851 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)12852 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
12853 return vreinterpretq_u64_u32(a);
12854 }
12855
12856 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
12857 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12858 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)12859 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
12860 return vreinterpretq_u64_f16(a);
12861 }
12862
12863 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
12864 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12865 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)12866 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
12867 return vreinterpretq_u64_f32(a);
12868 }
12869
12870 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
12871 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12872 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)12873 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
12874 return vreinterpretq_u64_p8(a);
12875 }
12876
12877 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
12878 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12879 // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)12880 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
12881 return vreinterpretq_u64_p16(a);
12882 }
12883
12884 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
12885 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12886 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)12887 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
12888 return vreinterpretq_f16_s8(a);
12889 }
12890
12891 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
12892 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12893 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)12894 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
12895 return vreinterpretq_f16_s16(a);
12896 }
12897
12898 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
12899 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12900 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)12901 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
12902 return vreinterpretq_f16_s32(a);
12903 }
12904
12905 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
12906 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12907 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)12908 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
12909 return vreinterpretq_f16_s64(a);
12910 }
12911
12912 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
12913 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12914 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)12915 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
12916 return vreinterpretq_f16_u8(a);
12917 }
12918
12919 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
12920 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12921 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)12922 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
12923 return vreinterpretq_f16_u16(a);
12924 }
12925
12926 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
12927 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12928 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)12929 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
12930 return vreinterpretq_f16_u32(a);
12931 }
12932
12933 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
12934 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12935 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)12936 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
12937 return vreinterpretq_f16_u64(a);
12938 }
12939
12940 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
12941 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
12942 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)12943 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
12944 return vreinterpretq_f16_f32(a);
12945 }
12946
12947 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
12948 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12949 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)12950 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
12951 return vreinterpretq_f16_p8(a);
12952 }
12953
12954 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
12955 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12956 // CHECK: ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)12957 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
12958 return vreinterpretq_f16_p16(a);
12959 }
12960
12961 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
12962 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
12963 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)12964 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
12965 return vreinterpretq_f32_s8(a);
12966 }
12967
12968 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
12969 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
12970 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)12971 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
12972 return vreinterpretq_f32_s16(a);
12973 }
12974
12975 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
12976 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
12977 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)12978 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
12979 return vreinterpretq_f32_s32(a);
12980 }
12981
12982 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
12983 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
12984 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)12985 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
12986 return vreinterpretq_f32_s64(a);
12987 }
12988
12989 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
12990 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
12991 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)12992 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
12993 return vreinterpretq_f32_u8(a);
12994 }
12995
12996 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
12997 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
12998 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)12999 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
13000 return vreinterpretq_f32_u16(a);
13001 }
13002
13003 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
13004 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
13005 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)13006 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
13007 return vreinterpretq_f32_u32(a);
13008 }
13009
13010 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
13011 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
13012 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)13013 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
13014 return vreinterpretq_f32_u64(a);
13015 }
13016
13017 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
13018 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
13019 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)13020 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
13021 return vreinterpretq_f32_f16(a);
13022 }
13023
13024 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
13025 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
13026 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)13027 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
13028 return vreinterpretq_f32_p8(a);
13029 }
13030
13031 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
13032 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
13033 // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)13034 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
13035 return vreinterpretq_f32_p16(a);
13036 }
13037
13038 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
13039 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)13040 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
13041 return vreinterpretq_p8_s8(a);
13042 }
13043
13044 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
13045 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13046 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)13047 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
13048 return vreinterpretq_p8_s16(a);
13049 }
13050
13051 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
13052 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13053 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)13054 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
13055 return vreinterpretq_p8_s32(a);
13056 }
13057
13058 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
13059 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13060 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)13061 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
13062 return vreinterpretq_p8_s64(a);
13063 }
13064
13065 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
13066 // CHECK: ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)13067 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
13068 return vreinterpretq_p8_u8(a);
13069 }
13070
13071 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
13072 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13073 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)13074 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
13075 return vreinterpretq_p8_u16(a);
13076 }
13077
13078 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
13079 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13080 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)13081 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
13082 return vreinterpretq_p8_u32(a);
13083 }
13084
13085 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
13086 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13087 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)13088 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
13089 return vreinterpretq_p8_u64(a);
13090 }
13091
13092 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
13093 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
13094 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)13095 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
13096 return vreinterpretq_p8_f16(a);
13097 }
13098
13099 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
13100 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13101 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)13102 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
13103 return vreinterpretq_p8_f32(a);
13104 }
13105
13106 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
13107 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13108 // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)13109 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
13110 return vreinterpretq_p8_p16(a);
13111 }
13112
13113 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
13114 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13115 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)13116 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
13117 return vreinterpretq_p16_s8(a);
13118 }
13119
13120 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
13121 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)13122 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
13123 return vreinterpretq_p16_s16(a);
13124 }
13125
13126 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
13127 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
13128 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)13129 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
13130 return vreinterpretq_p16_s32(a);
13131 }
13132
13133 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
13134 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
13135 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)13136 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
13137 return vreinterpretq_p16_s64(a);
13138 }
13139
13140 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
13141 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13142 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)13143 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
13144 return vreinterpretq_p16_u8(a);
13145 }
13146
13147 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
13148 // CHECK: ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)13149 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
13150 return vreinterpretq_p16_u16(a);
13151 }
13152
13153 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
13154 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
13155 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)13156 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
13157 return vreinterpretq_p16_u32(a);
13158 }
13159
13160 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
13161 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
13162 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)13163 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
13164 return vreinterpretq_p16_u64(a);
13165 }
13166
13167 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
13168 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
13169 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)13170 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
13171 return vreinterpretq_p16_f16(a);
13172 }
13173
13174 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
13175 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
13176 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)13177 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
13178 return vreinterpretq_p16_f32(a);
13179 }
13180
13181 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
13182 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13183 // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)13184 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
13185 return vreinterpretq_p16_p8(a);
13186 }
13187
13188 // CHECK-LABEL: @test_vrev16_s8(
13189 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13190 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_s8(int8x8_t a)13191 int8x8_t test_vrev16_s8(int8x8_t a) {
13192 return vrev16_s8(a);
13193 }
13194
13195 // CHECK-LABEL: @test_vrev16_u8(
13196 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13197 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_u8(uint8x8_t a)13198 uint8x8_t test_vrev16_u8(uint8x8_t a) {
13199 return vrev16_u8(a);
13200 }
13201
13202 // CHECK-LABEL: @test_vrev16_p8(
13203 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13204 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_p8(poly8x8_t a)13205 poly8x8_t test_vrev16_p8(poly8x8_t a) {
13206 return vrev16_p8(a);
13207 }
13208
13209 // CHECK-LABEL: @test_vrev16q_s8(
13210 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13211 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_s8(int8x16_t a)13212 int8x16_t test_vrev16q_s8(int8x16_t a) {
13213 return vrev16q_s8(a);
13214 }
13215
13216 // CHECK-LABEL: @test_vrev16q_u8(
13217 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13218 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_u8(uint8x16_t a)13219 uint8x16_t test_vrev16q_u8(uint8x16_t a) {
13220 return vrev16q_u8(a);
13221 }
13222
13223 // CHECK-LABEL: @test_vrev16q_p8(
13224 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13225 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_p8(poly8x16_t a)13226 poly8x16_t test_vrev16q_p8(poly8x16_t a) {
13227 return vrev16q_p8(a);
13228 }
13229
13230 // CHECK-LABEL: @test_vrev32_s8(
13231 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13232 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_s8(int8x8_t a)13233 int8x8_t test_vrev32_s8(int8x8_t a) {
13234 return vrev32_s8(a);
13235 }
13236
13237 // CHECK-LABEL: @test_vrev32_s16(
13238 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13239 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_s16(int16x4_t a)13240 int16x4_t test_vrev32_s16(int16x4_t a) {
13241 return vrev32_s16(a);
13242 }
13243
13244 // CHECK-LABEL: @test_vrev32_u8(
13245 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13246 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_u8(uint8x8_t a)13247 uint8x8_t test_vrev32_u8(uint8x8_t a) {
13248 return vrev32_u8(a);
13249 }
13250
13251 // CHECK-LABEL: @test_vrev32_u16(
13252 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13253 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_u16(uint16x4_t a)13254 uint16x4_t test_vrev32_u16(uint16x4_t a) {
13255 return vrev32_u16(a);
13256 }
13257
13258 // CHECK-LABEL: @test_vrev32_p8(
13259 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13260 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_p8(poly8x8_t a)13261 poly8x8_t test_vrev32_p8(poly8x8_t a) {
13262 return vrev32_p8(a);
13263 }
13264
13265 // CHECK-LABEL: @test_vrev32_p16(
13266 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13267 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_p16(poly16x4_t a)13268 poly16x4_t test_vrev32_p16(poly16x4_t a) {
13269 return vrev32_p16(a);
13270 }
13271
13272 // CHECK-LABEL: @test_vrev32q_s8(
13273 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13274 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_s8(int8x16_t a)13275 int8x16_t test_vrev32q_s8(int8x16_t a) {
13276 return vrev32q_s8(a);
13277 }
13278
13279 // CHECK-LABEL: @test_vrev32q_s16(
13280 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13281 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_s16(int16x8_t a)13282 int16x8_t test_vrev32q_s16(int16x8_t a) {
13283 return vrev32q_s16(a);
13284 }
13285
13286 // CHECK-LABEL: @test_vrev32q_u8(
13287 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13288 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_u8(uint8x16_t a)13289 uint8x16_t test_vrev32q_u8(uint8x16_t a) {
13290 return vrev32q_u8(a);
13291 }
13292
13293 // CHECK-LABEL: @test_vrev32q_u16(
13294 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13295 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_u16(uint16x8_t a)13296 uint16x8_t test_vrev32q_u16(uint16x8_t a) {
13297 return vrev32q_u16(a);
13298 }
13299
13300 // CHECK-LABEL: @test_vrev32q_p8(
13301 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13302 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_p8(poly8x16_t a)13303 poly8x16_t test_vrev32q_p8(poly8x16_t a) {
13304 return vrev32q_p8(a);
13305 }
13306
13307 // CHECK-LABEL: @test_vrev32q_p16(
13308 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13309 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_p16(poly16x8_t a)13310 poly16x8_t test_vrev32q_p16(poly16x8_t a) {
13311 return vrev32q_p16(a);
13312 }
13313
13314 // CHECK-LABEL: @test_vrev64_s8(
13315 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13316 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_s8(int8x8_t a)13317 int8x8_t test_vrev64_s8(int8x8_t a) {
13318 return vrev64_s8(a);
13319 }
13320
13321 // CHECK-LABEL: @test_vrev64_s16(
13322 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13323 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_s16(int16x4_t a)13324 int16x4_t test_vrev64_s16(int16x4_t a) {
13325 return vrev64_s16(a);
13326 }
13327
13328 // CHECK-LABEL: @test_vrev64_s32(
13329 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
13330 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_s32(int32x2_t a)13331 int32x2_t test_vrev64_s32(int32x2_t a) {
13332 return vrev64_s32(a);
13333 }
13334
13335 // CHECK-LABEL: @test_vrev64_u8(
13336 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13337 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_u8(uint8x8_t a)13338 uint8x8_t test_vrev64_u8(uint8x8_t a) {
13339 return vrev64_u8(a);
13340 }
13341
13342 // CHECK-LABEL: @test_vrev64_u16(
13343 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13344 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_u16(uint16x4_t a)13345 uint16x4_t test_vrev64_u16(uint16x4_t a) {
13346 return vrev64_u16(a);
13347 }
13348
13349 // CHECK-LABEL: @test_vrev64_u32(
13350 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
13351 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_u32(uint32x2_t a)13352 uint32x2_t test_vrev64_u32(uint32x2_t a) {
13353 return vrev64_u32(a);
13354 }
13355
13356 // CHECK-LABEL: @test_vrev64_p8(
13357 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13358 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_p8(poly8x8_t a)13359 poly8x8_t test_vrev64_p8(poly8x8_t a) {
13360 return vrev64_p8(a);
13361 }
13362
13363 // CHECK-LABEL: @test_vrev64_p16(
13364 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13365 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_p16(poly16x4_t a)13366 poly16x4_t test_vrev64_p16(poly16x4_t a) {
13367 return vrev64_p16(a);
13368 }
13369
13370 // CHECK-LABEL: @test_vrev64_f32(
13371 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
13372 // CHECK: ret <2 x float> [[SHUFFLE_I]]
test_vrev64_f32(float32x2_t a)13373 float32x2_t test_vrev64_f32(float32x2_t a) {
13374 return vrev64_f32(a);
13375 }
13376
13377 // CHECK-LABEL: @test_vrev64q_s8(
13378 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13379 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_s8(int8x16_t a)13380 int8x16_t test_vrev64q_s8(int8x16_t a) {
13381 return vrev64q_s8(a);
13382 }
13383
13384 // CHECK-LABEL: @test_vrev64q_s16(
13385 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13386 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_s16(int16x8_t a)13387 int16x8_t test_vrev64q_s16(int16x8_t a) {
13388 return vrev64q_s16(a);
13389 }
13390
13391 // CHECK-LABEL: @test_vrev64q_s32(
13392 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13393 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_s32(int32x4_t a)13394 int32x4_t test_vrev64q_s32(int32x4_t a) {
13395 return vrev64q_s32(a);
13396 }
13397
13398 // CHECK-LABEL: @test_vrev64q_u8(
13399 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13400 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_u8(uint8x16_t a)13401 uint8x16_t test_vrev64q_u8(uint8x16_t a) {
13402 return vrev64q_u8(a);
13403 }
13404
13405 // CHECK-LABEL: @test_vrev64q_u16(
13406 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13407 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_u16(uint16x8_t a)13408 uint16x8_t test_vrev64q_u16(uint16x8_t a) {
13409 return vrev64q_u16(a);
13410 }
13411
13412 // CHECK-LABEL: @test_vrev64q_u32(
13413 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13414 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_u32(uint32x4_t a)13415 uint32x4_t test_vrev64q_u32(uint32x4_t a) {
13416 return vrev64q_u32(a);
13417 }
13418
13419 // CHECK-LABEL: @test_vrev64q_p8(
13420 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13421 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_p8(poly8x16_t a)13422 poly8x16_t test_vrev64q_p8(poly8x16_t a) {
13423 return vrev64q_p8(a);
13424 }
13425
13426 // CHECK-LABEL: @test_vrev64q_p16(
13427 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13428 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_p16(poly16x8_t a)13429 poly16x8_t test_vrev64q_p16(poly16x8_t a) {
13430 return vrev64q_p16(a);
13431 }
13432
13433 // CHECK-LABEL: @test_vrev64q_f32(
13434 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13435 // CHECK: ret <4 x float> [[SHUFFLE_I]]
test_vrev64q_f32(float32x4_t a)13436 float32x4_t test_vrev64q_f32(float32x4_t a) {
13437 return vrev64q_f32(a);
13438 }
13439
13440 // CHECK-LABEL: @test_vrhadd_s8(
13441 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
13442 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t a,int8x8_t b)13443 int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) {
13444 return vrhadd_s8(a, b);
13445 }
13446
13447 // CHECK-LABEL: @test_vrhadd_s16(
13448 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13449 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13450 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
13451 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13452 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_s16(int16x4_t a,int16x4_t b)13453 int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) {
13454 return vrhadd_s16(a, b);
13455 }
13456
13457 // CHECK-LABEL: @test_vrhadd_s32(
13458 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13459 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13460 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
13461 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13462 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_s32(int32x2_t a,int32x2_t b)13463 int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) {
13464 return vrhadd_s32(a, b);
13465 }
13466
13467 // CHECK-LABEL: @test_vrhadd_u8(
13468 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
13469 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t a,uint8x8_t b)13470 uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) {
13471 return vrhadd_u8(a, b);
13472 }
13473
13474 // CHECK-LABEL: @test_vrhadd_u16(
13475 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13476 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13477 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
13478 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13479 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_u16(uint16x4_t a,uint16x4_t b)13480 uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) {
13481 return vrhadd_u16(a, b);
13482 }
13483
13484 // CHECK-LABEL: @test_vrhadd_u32(
13485 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13486 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13487 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
13488 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13489 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_u32(uint32x2_t a,uint32x2_t b)13490 uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) {
13491 return vrhadd_u32(a, b);
13492 }
13493
13494 // CHECK-LABEL: @test_vrhaddq_s8(
13495 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
13496 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t a,int8x16_t b)13497 int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) {
13498 return vrhaddq_s8(a, b);
13499 }
13500
13501 // CHECK-LABEL: @test_vrhaddq_s16(
13502 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13503 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13504 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
13505 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13506 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_s16(int16x8_t a,int16x8_t b)13507 int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) {
13508 return vrhaddq_s16(a, b);
13509 }
13510
13511 // CHECK-LABEL: @test_vrhaddq_s32(
13512 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13513 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13514 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
13515 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13516 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_s32(int32x4_t a,int32x4_t b)13517 int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) {
13518 return vrhaddq_s32(a, b);
13519 }
13520
13521 // CHECK-LABEL: @test_vrhaddq_u8(
13522 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
13523 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t a,uint8x16_t b)13524 uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) {
13525 return vrhaddq_u8(a, b);
13526 }
13527
13528 // CHECK-LABEL: @test_vrhaddq_u16(
13529 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13530 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13531 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
13532 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13533 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_u16(uint16x8_t a,uint16x8_t b)13534 uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) {
13535 return vrhaddq_u16(a, b);
13536 }
13537
13538 // CHECK-LABEL: @test_vrhaddq_u32(
13539 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13540 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13541 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
13542 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13543 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_u32(uint32x4_t a,uint32x4_t b)13544 uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) {
13545 return vrhaddq_u32(a, b);
13546 }
13547
13548 // CHECK-LABEL: @test_vrshl_s8(
13549 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
13550 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)13551 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
13552 return vrshl_s8(a, b);
13553 }
13554
13555 // CHECK-LABEL: @test_vrshl_s16(
13556 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13557 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13558 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
13559 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13560 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_s16(int16x4_t a,int16x4_t b)13561 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
13562 return vrshl_s16(a, b);
13563 }
13564
13565 // CHECK-LABEL: @test_vrshl_s32(
13566 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13567 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13568 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
13569 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13570 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_s32(int32x2_t a,int32x2_t b)13571 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
13572 return vrshl_s32(a, b);
13573 }
13574
13575 // CHECK-LABEL: @test_vrshl_s64(
13576 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13577 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13578 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
13579 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13580 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_s64(int64x1_t a,int64x1_t b)13581 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
13582 return vrshl_s64(a, b);
13583 }
13584
13585 // CHECK-LABEL: @test_vrshl_u8(
13586 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
13587 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)13588 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
13589 return vrshl_u8(a, b);
13590 }
13591
13592 // CHECK-LABEL: @test_vrshl_u16(
13593 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13594 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13595 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
13596 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13597 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)13598 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
13599 return vrshl_u16(a, b);
13600 }
13601
13602 // CHECK-LABEL: @test_vrshl_u32(
13603 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13604 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13605 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
13606 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13607 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)13608 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
13609 return vrshl_u32(a, b);
13610 }
13611
13612 // CHECK-LABEL: @test_vrshl_u64(
13613 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13614 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13615 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
13616 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13617 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)13618 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
13619 return vrshl_u64(a, b);
13620 }
13621
13622 // CHECK-LABEL: @test_vrshlq_s8(
13623 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
13624 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)13625 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
13626 return vrshlq_s8(a, b);
13627 }
13628
13629 // CHECK-LABEL: @test_vrshlq_s16(
13630 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13631 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13632 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
13633 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13634 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)13635 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
13636 return vrshlq_s16(a, b);
13637 }
13638
13639 // CHECK-LABEL: @test_vrshlq_s32(
13640 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13641 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13642 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
13643 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13644 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)13645 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
13646 return vrshlq_s32(a, b);
13647 }
13648
13649 // CHECK-LABEL: @test_vrshlq_s64(
13650 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13651 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13652 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
13653 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13654 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)13655 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
13656 return vrshlq_s64(a, b);
13657 }
13658
13659 // CHECK-LABEL: @test_vrshlq_u8(
13660 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
13661 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)13662 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
13663 return vrshlq_u8(a, b);
13664 }
13665
13666 // CHECK-LABEL: @test_vrshlq_u16(
13667 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13668 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13669 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
13670 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13671 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)13672 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
13673 return vrshlq_u16(a, b);
13674 }
13675
13676 // CHECK-LABEL: @test_vrshlq_u32(
13677 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13678 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13679 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
13680 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13681 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)13682 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
13683 return vrshlq_u32(a, b);
13684 }
13685
13686 // CHECK-LABEL: @test_vrshlq_u64(
13687 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13688 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13689 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
13690 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13691 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)13692 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
13693 return vrshlq_u64(a, b);
13694 }
13695
13696 // CHECK-LABEL: @test_vrshrn_n_s16(
13697 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13698 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13699 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13700 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)13701 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
13702 return vrshrn_n_s16(a, 1);
13703 }
13704
13705 // CHECK-LABEL: @test_vrshrn_n_s32(
13706 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13707 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13708 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13709 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)13710 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
13711 return vrshrn_n_s32(a, 1);
13712 }
13713
13714 // CHECK-LABEL: @test_vrshrn_n_s64(
13715 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13716 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13717 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13718 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)13719 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
13720 return vrshrn_n_s64(a, 1);
13721 }
13722
13723 // CHECK-LABEL: @test_vrshrn_n_u16(
13724 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13725 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13726 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13727 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)13728 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
13729 return vrshrn_n_u16(a, 1);
13730 }
13731
13732 // CHECK-LABEL: @test_vrshrn_n_u32(
13733 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13734 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13735 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13736 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)13737 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
13738 return vrshrn_n_u32(a, 1);
13739 }
13740
13741 // CHECK-LABEL: @test_vrshrn_n_u64(
13742 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13743 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13744 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13745 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)13746 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
13747 return vrshrn_n_u64(a, 1);
13748 }
13749
13750 // CHECK-LABEL: @test_vrshr_n_s8(
13751 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13752 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)13753 int8x8_t test_vrshr_n_s8(int8x8_t a) {
13754 return vrshr_n_s8(a, 1);
13755 }
13756
13757 // CHECK-LABEL: @test_vrshr_n_s16(
13758 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13759 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13760 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13761 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)13762 int16x4_t test_vrshr_n_s16(int16x4_t a) {
13763 return vrshr_n_s16(a, 1);
13764 }
13765
13766 // CHECK-LABEL: @test_vrshr_n_s32(
13767 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13768 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13769 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13770 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)13771 int32x2_t test_vrshr_n_s32(int32x2_t a) {
13772 return vrshr_n_s32(a, 1);
13773 }
13774
13775 // CHECK-LABEL: @test_vrshr_n_s64(
13776 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13777 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13778 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13779 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)13780 int64x1_t test_vrshr_n_s64(int64x1_t a) {
13781 return vrshr_n_s64(a, 1);
13782 }
13783
13784 // CHECK-LABEL: @test_vrshr_n_u8(
13785 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13786 // CHECK: ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(uint8x8_t a)13787 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
13788 return vrshr_n_u8(a, 1);
13789 }
13790
13791 // CHECK-LABEL: @test_vrshr_n_u16(
13792 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13793 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13794 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13795 // CHECK: ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(uint16x4_t a)13796 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
13797 return vrshr_n_u16(a, 1);
13798 }
13799
13800 // CHECK-LABEL: @test_vrshr_n_u32(
13801 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13802 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13803 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13804 // CHECK: ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(uint32x2_t a)13805 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
13806 return vrshr_n_u32(a, 1);
13807 }
13808
13809 // CHECK-LABEL: @test_vrshr_n_u64(
13810 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13811 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13812 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13813 // CHECK: ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)13814 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
13815 return vrshr_n_u64(a, 1);
13816 }
13817
13818 // CHECK-LABEL: @test_vrshrq_n_s8(
13819 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13820 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)13821 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
13822 return vrshrq_n_s8(a, 1);
13823 }
13824
13825 // CHECK-LABEL: @test_vrshrq_n_s16(
13826 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13827 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13828 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13829 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)13830 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
13831 return vrshrq_n_s16(a, 1);
13832 }
13833
13834 // CHECK-LABEL: @test_vrshrq_n_s32(
13835 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13836 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13837 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13838 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)13839 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
13840 return vrshrq_n_s32(a, 1);
13841 }
13842
13843 // CHECK-LABEL: @test_vrshrq_n_s64(
13844 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13845 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13846 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13847 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)13848 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
13849 return vrshrq_n_s64(a, 1);
13850 }
13851
13852 // CHECK-LABEL: @test_vrshrq_n_u8(
13853 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13854 // CHECK: ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(uint8x16_t a)13855 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
13856 return vrshrq_n_u8(a, 1);
13857 }
13858
13859 // CHECK-LABEL: @test_vrshrq_n_u16(
13860 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13861 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13862 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13863 // CHECK: ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(uint16x8_t a)13864 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
13865 return vrshrq_n_u16(a, 1);
13866 }
13867
13868 // CHECK-LABEL: @test_vrshrq_n_u32(
13869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13870 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13871 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13872 // CHECK: ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(uint32x4_t a)13873 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
13874 return vrshrq_n_u32(a, 1);
13875 }
13876
13877 // CHECK-LABEL: @test_vrshrq_n_u64(
13878 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13879 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13880 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13881 // CHECK: ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(uint64x2_t a)13882 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
13883 return vrshrq_n_u64(a, 1);
13884 }
13885
13886 // CHECK-LABEL: @test_vrsqrte_f32(
13887 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13888 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a)
13889 // CHECK: ret <2 x float> [[VRSQRTE_V1_I]]
test_vrsqrte_f32(float32x2_t a)13890 float32x2_t test_vrsqrte_f32(float32x2_t a) {
13891 return vrsqrte_f32(a);
13892 }
13893
13894 // CHECK-LABEL: @test_vrsqrte_u32(
13895 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13896 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a)
13897 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)13898 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
13899 return vrsqrte_u32(a);
13900 }
13901
13902 // CHECK-LABEL: @test_vrsqrteq_f32(
13903 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13904 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a)
13905 // CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_f32(float32x4_t a)13906 float32x4_t test_vrsqrteq_f32(float32x4_t a) {
13907 return vrsqrteq_f32(a);
13908 }
13909
13910 // CHECK-LABEL: @test_vrsqrteq_u32(
13911 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13912 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a)
13913 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)13914 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
13915 return vrsqrteq_u32(a);
13916 }
13917
13918 // CHECK-LABEL: @test_vrsqrts_f32(
13919 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13920 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
13921 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b)
13922 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
13923 // CHECK: ret <2 x float> [[VRSQRTS_V2_I]]
test_vrsqrts_f32(float32x2_t a,float32x2_t b)13924 float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) {
13925 return vrsqrts_f32(a, b);
13926 }
13927
13928 // CHECK-LABEL: @test_vrsqrtsq_f32(
13929 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13930 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
13931 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b)
13932 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
13933 // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f32(float32x4_t a,float32x4_t b)13934 float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) {
13935 return vrsqrtsq_f32(a, b);
13936 }
13937
13938 // CHECK-LABEL: @test_vrsra_n_s8(
13939 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13940 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
13941 // CHECK: ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)13942 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
13943 return vrsra_n_s8(a, b, 1);
13944 }
13945
13946 // CHECK-LABEL: @test_vrsra_n_s16(
13947 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13948 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13949 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13950 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
13951 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13952 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
13953 // CHECK: ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)13954 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
13955 return vrsra_n_s16(a, b, 1);
13956 }
13957
13958 // CHECK-LABEL: @test_vrsra_n_s32(
13959 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13960 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13961 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13962 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
13963 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
13964 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
13965 // CHECK: ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)13966 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
13967 return vrsra_n_s32(a, b, 1);
13968 }
13969
13970 // CHECK-LABEL: @test_vrsra_n_s64(
13971 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13972 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13973 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13974 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13975 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
13976 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
13977 // CHECK: ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)13978 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
13979 return vrsra_n_s64(a, b, 1);
13980 }
13981
13982 // CHECK-LABEL: @test_vrsra_n_u8(
13983 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13984 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
13985 // CHECK: ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_u8(uint8x8_t a,uint8x8_t b)13986 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
13987 return vrsra_n_u8(a, b, 1);
13988 }
13989
13990 // CHECK-LABEL: @test_vrsra_n_u16(
13991 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13992 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13993 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13994 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
13995 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13996 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
13997 // CHECK: ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_u16(uint16x4_t a,uint16x4_t b)13998 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
13999 return vrsra_n_u16(a, b, 1);
14000 }
14001
14002 // CHECK-LABEL: @test_vrsra_n_u32(
14003 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14004 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14005 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14006 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14007 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
14008 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
14009 // CHECK: ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_u32(uint32x2_t a,uint32x2_t b)14010 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
14011 return vrsra_n_u32(a, b, 1);
14012 }
14013
14014 // CHECK-LABEL: @test_vrsra_n_u64(
14015 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14016 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14017 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14018 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14019 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
14020 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
14021 // CHECK: ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)14022 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
14023 return vrsra_n_u64(a, b, 1);
14024 }
14025
14026 // CHECK-LABEL: @test_vrsraq_n_s8(
14027 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14028 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
14029 // CHECK: ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)14030 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
14031 return vrsraq_n_s8(a, b, 1);
14032 }
14033
14034 // CHECK-LABEL: @test_vrsraq_n_s16(
14035 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14036 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14037 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14038 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14039 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
14040 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
14041 // CHECK: ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)14042 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
14043 return vrsraq_n_s16(a, b, 1);
14044 }
14045
14046 // CHECK-LABEL: @test_vrsraq_n_s32(
14047 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14048 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14049 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14050 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14051 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
14052 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
14053 // CHECK: ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)14054 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
14055 return vrsraq_n_s32(a, b, 1);
14056 }
14057
14058 // CHECK-LABEL: @test_vrsraq_n_s64(
14059 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14060 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14061 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14062 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14063 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
14064 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
14065 // CHECK: ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)14066 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
14067 return vrsraq_n_s64(a, b, 1);
14068 }
14069
14070 // CHECK-LABEL: @test_vrsraq_n_u8(
14071 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14072 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
14073 // CHECK: ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_u8(uint8x16_t a,uint8x16_t b)14074 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
14075 return vrsraq_n_u8(a, b, 1);
14076 }
14077
14078 // CHECK-LABEL: @test_vrsraq_n_u16(
14079 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14080 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14081 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14082 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14083 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
14084 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
14085 // CHECK: ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_u16(uint16x8_t a,uint16x8_t b)14086 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
14087 return vrsraq_n_u16(a, b, 1);
14088 }
14089
14090 // CHECK-LABEL: @test_vrsraq_n_u32(
14091 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14092 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14093 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14094 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14095 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
14096 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
14097 // CHECK: ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_u32(uint32x4_t a,uint32x4_t b)14098 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
14099 return vrsraq_n_u32(a, b, 1);
14100 }
14101
14102 // CHECK-LABEL: @test_vrsraq_n_u64(
14103 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14104 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14105 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14106 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14107 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
14108 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
14109 // CHECK: ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_u64(uint64x2_t a,uint64x2_t b)14110 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
14111 return vrsraq_n_u64(a, b, 1);
14112 }
14113
14114 // CHECK-LABEL: @test_vrsubhn_s16(
14115 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14116 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14117 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
14118 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)14119 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
14120 return vrsubhn_s16(a, b);
14121 }
14122
14123 // CHECK-LABEL: @test_vrsubhn_s32(
14124 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14125 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14126 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
14127 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
14128 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)14129 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
14130 return vrsubhn_s32(a, b);
14131 }
14132
14133 // CHECK-LABEL: @test_vrsubhn_s64(
14134 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14135 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14136 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
14137 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
14138 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)14139 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
14140 return vrsubhn_s64(a, b);
14141 }
14142
14143 // CHECK-LABEL: @test_vrsubhn_u16(
14144 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14145 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14146 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
14147 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)14148 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
14149 return vrsubhn_u16(a, b);
14150 }
14151
14152 // CHECK-LABEL: @test_vrsubhn_u32(
14153 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14154 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14155 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
14156 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
14157 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)14158 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
14159 return vrsubhn_u32(a, b);
14160 }
14161
14162 // CHECK-LABEL: @test_vrsubhn_u64(
14163 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14164 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14165 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
14166 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
14167 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)14168 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
14169 return vrsubhn_u64(a, b);
14170 }
14171
14172 // CHECK-LABEL: @test_vset_lane_u8(
14173 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14174 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_u8(uint8_t a,uint8x8_t b)14175 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
14176 return vset_lane_u8(a, b, 7);
14177 }
14178
14179 // CHECK-LABEL: @test_vset_lane_u16(
14180 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14181 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_u16(uint16_t a,uint16x4_t b)14182 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
14183 return vset_lane_u16(a, b, 3);
14184 }
14185
14186 // CHECK-LABEL: @test_vset_lane_u32(
14187 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
14188 // CHECK: ret <2 x i32> [[VSET_LANE]]
test_vset_lane_u32(uint32_t a,uint32x2_t b)14189 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
14190 return vset_lane_u32(a, b, 1);
14191 }
14192
14193 // CHECK-LABEL: @test_vset_lane_s8(
14194 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14195 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_s8(int8_t a,int8x8_t b)14196 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
14197 return vset_lane_s8(a, b, 7);
14198 }
14199
14200 // CHECK-LABEL: @test_vset_lane_s16(
14201 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14202 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_s16(int16_t a,int16x4_t b)14203 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
14204 return vset_lane_s16(a, b, 3);
14205 }
14206
14207 // CHECK-LABEL: @test_vset_lane_s32(
14208 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
14209 // CHECK: ret <2 x i32> [[VSET_LANE]]
test_vset_lane_s32(int32_t a,int32x2_t b)14210 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
14211 return vset_lane_s32(a, b, 1);
14212 }
14213
14214 // CHECK-LABEL: @test_vset_lane_p8(
14215 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14216 // CHECK: ret <8 x i8> [[VSET_LANE]]
test_vset_lane_p8(poly8_t a,poly8x8_t b)14217 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
14218 return vset_lane_p8(a, b, 7);
14219 }
14220
14221 // CHECK-LABEL: @test_vset_lane_p16(
14222 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14223 // CHECK: ret <4 x i16> [[VSET_LANE]]
test_vset_lane_p16(poly16_t a,poly16x4_t b)14224 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
14225 return vset_lane_p16(a, b, 3);
14226 }
14227
14228 // CHECK-LABEL: @test_vset_lane_f32(
14229 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
14230 // CHECK: ret <2 x float> [[VSET_LANE]]
test_vset_lane_f32(float32_t a,float32x2_t b)14231 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
14232 return vset_lane_f32(a, b, 1);
14233 }
14234
14235 // CHECK-LABEL: @test_vset_lane_f16(
14236 // CHECK: [[__REINT_246:%.*]] = alloca half, align 2
14237 // CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
14238 // CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
14239 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
14240 // CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2
14241 // CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
14242 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
14243 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
14244 // CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
14245 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
14246 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 1
14247 // CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
14248 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
14249 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
14250 // CHECK: ret <4 x half> [[TMP8]]
test_vset_lane_f16(float16_t * a,float16x4_t b)14251 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
14252 return vset_lane_f16(*a, b, 1);
14253 }
14254
14255 // CHECK-LABEL: @test_vsetq_lane_u8(
14256 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14257 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_u8(uint8_t a,uint8x16_t b)14258 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
14259 return vsetq_lane_u8(a, b, 15);
14260 }
14261
14262 // CHECK-LABEL: @test_vsetq_lane_u16(
14263 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14264 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_u16(uint16_t a,uint16x8_t b)14265 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
14266 return vsetq_lane_u16(a, b, 7);
14267 }
14268
14269 // CHECK-LABEL: @test_vsetq_lane_u32(
14270 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
14271 // CHECK: ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_u32(uint32_t a,uint32x4_t b)14272 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
14273 return vsetq_lane_u32(a, b, 3);
14274 }
14275
14276 // CHECK-LABEL: @test_vsetq_lane_s8(
14277 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14278 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_s8(int8_t a,int8x16_t b)14279 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
14280 return vsetq_lane_s8(a, b, 15);
14281 }
14282
14283 // CHECK-LABEL: @test_vsetq_lane_s16(
14284 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14285 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_s16(int16_t a,int16x8_t b)14286 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
14287 return vsetq_lane_s16(a, b, 7);
14288 }
14289
14290 // CHECK-LABEL: @test_vsetq_lane_s32(
14291 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
14292 // CHECK: ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_s32(int32_t a,int32x4_t b)14293 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
14294 return vsetq_lane_s32(a, b, 3);
14295 }
14296
14297 // CHECK-LABEL: @test_vsetq_lane_p8(
14298 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14299 // CHECK: ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_p8(poly8_t a,poly8x16_t b)14300 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
14301 return vsetq_lane_p8(a, b, 15);
14302 }
14303
14304 // CHECK-LABEL: @test_vsetq_lane_p16(
14305 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14306 // CHECK: ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_p16(poly16_t a,poly16x8_t b)14307 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
14308 return vsetq_lane_p16(a, b, 7);
14309 }
14310
14311 // CHECK-LABEL: @test_vsetq_lane_f32(
14312 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
14313 // CHECK: ret <4 x float> [[VSET_LANE]]
test_vsetq_lane_f32(float32_t a,float32x4_t b)14314 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
14315 return vsetq_lane_f32(a, b, 3);
14316 }
14317
14318 // CHECK-LABEL: @test_vsetq_lane_f16(
14319 // CHECK: [[__REINT_248:%.*]] = alloca half, align 2
14320 // CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
14321 // CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
14322 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
14323 // CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2
14324 // CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
14325 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
14326 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
14327 // CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
14328 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
14329 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 3
14330 // CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
14331 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
14332 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
14333 // CHECK: ret <8 x half> [[TMP8]]
test_vsetq_lane_f16(float16_t * a,float16x8_t b)14334 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
14335 return vsetq_lane_f16(*a, b, 3);
14336 }
14337
14338 // CHECK-LABEL: @test_vset_lane_s64(
14339 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
14340 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vset_lane_s64(int64_t a,int64x1_t b)14341 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
14342 return vset_lane_s64(a, b, 0);
14343 }
14344
14345 // CHECK-LABEL: @test_vset_lane_u64(
14346 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
14347 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vset_lane_u64(uint64_t a,uint64x1_t b)14348 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
14349 return vset_lane_u64(a, b, 0);
14350 }
14351
14352 // CHECK-LABEL: @test_vsetq_lane_s64(
14353 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
14354 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_s64(int64_t a,int64x2_t b)14355 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
14356 return vsetq_lane_s64(a, b, 1);
14357 }
14358
14359 // CHECK-LABEL: @test_vsetq_lane_u64(
14360 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
14361 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_u64(uint64_t a,uint64x2_t b)14362 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
14363 return vsetq_lane_u64(a, b, 1);
14364 }
14365
14366 // CHECK-LABEL: @test_vshl_s8(
14367 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
14368 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)14369 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
14370 return vshl_s8(a, b);
14371 }
14372
14373 // CHECK-LABEL: @test_vshl_s16(
14374 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14375 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14376 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
14377 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
14378 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
test_vshl_s16(int16x4_t a,int16x4_t b)14379 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
14380 return vshl_s16(a, b);
14381 }
14382
14383 // CHECK-LABEL: @test_vshl_s32(
14384 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14385 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14386 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
14387 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
14388 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
test_vshl_s32(int32x2_t a,int32x2_t b)14389 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
14390 return vshl_s32(a, b);
14391 }
14392
14393 // CHECK-LABEL: @test_vshl_s64(
14394 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14395 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14396 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
14397 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
14398 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
test_vshl_s64(int64x1_t a,int64x1_t b)14399 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
14400 return vshl_s64(a, b);
14401 }
14402
14403 // CHECK-LABEL: @test_vshl_u8(
14404 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
14405 // CHECK: ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)14406 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
14407 return vshl_u8(a, b);
14408 }
14409
14410 // CHECK-LABEL: @test_vshl_u16(
14411 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14412 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14413 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
14414 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
14415 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
test_vshl_u16(uint16x4_t a,int16x4_t b)14416 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
14417 return vshl_u16(a, b);
14418 }
14419
14420 // CHECK-LABEL: @test_vshl_u32(
14421 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14422 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14423 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
14424 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
14425 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
test_vshl_u32(uint32x2_t a,int32x2_t b)14426 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
14427 return vshl_u32(a, b);
14428 }
14429
14430 // CHECK-LABEL: @test_vshl_u64(
14431 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14432 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14433 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
14434 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
14435 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
test_vshl_u64(uint64x1_t a,int64x1_t b)14436 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
14437 return vshl_u64(a, b);
14438 }
14439
14440 // CHECK-LABEL: @test_vshlq_s8(
14441 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
14442 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)14443 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
14444 return vshlq_s8(a, b);
14445 }
14446
14447 // CHECK-LABEL: @test_vshlq_s16(
14448 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14449 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14450 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
14451 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14452 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_s16(int16x8_t a,int16x8_t b)14453 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
14454 return vshlq_s16(a, b);
14455 }
14456
14457 // CHECK-LABEL: @test_vshlq_s32(
14458 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14459 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14460 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
14461 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14462 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_s32(int32x4_t a,int32x4_t b)14463 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
14464 return vshlq_s32(a, b);
14465 }
14466
14467 // CHECK-LABEL: @test_vshlq_s64(
14468 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14469 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14470 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
14471 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14472 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_s64(int64x2_t a,int64x2_t b)14473 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
14474 return vshlq_s64(a, b);
14475 }
14476
14477 // CHECK-LABEL: @test_vshlq_u8(
14478 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
14479 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)14480 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
14481 return vshlq_u8(a, b);
14482 }
14483
14484 // CHECK-LABEL: @test_vshlq_u16(
14485 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14486 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14487 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
14488 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14489 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)14490 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
14491 return vshlq_u16(a, b);
14492 }
14493
14494 // CHECK-LABEL: @test_vshlq_u32(
14495 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14496 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14497 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
14498 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14499 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)14500 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
14501 return vshlq_u32(a, b);
14502 }
14503
14504 // CHECK-LABEL: @test_vshlq_u64(
14505 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14506 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14507 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
14508 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14509 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)14510 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
14511 return vshlq_u64(a, b);
14512 }
14513
14514 // CHECK-LABEL: @test_vshll_n_s8(
14515 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
14516 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14517 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)14518 int16x8_t test_vshll_n_s8(int8x8_t a) {
14519 return vshll_n_s8(a, 1);
14520 }
14521
14522 // CHECK-LABEL: @test_vshll_n_s16(
14523 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14524 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14525 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
14526 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14527 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)14528 int32x4_t test_vshll_n_s16(int16x4_t a) {
14529 return vshll_n_s16(a, 1);
14530 }
14531
14532 // CHECK-LABEL: @test_vshll_n_s32(
14533 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14534 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14535 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
14536 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14537 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)14538 int64x2_t test_vshll_n_s32(int32x2_t a) {
14539 return vshll_n_s32(a, 1);
14540 }
14541
14542 // CHECK-LABEL: @test_vshll_n_u8(
14543 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
14544 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14545 // CHECK: ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)14546 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
14547 return vshll_n_u8(a, 1);
14548 }
14549
14550 // CHECK-LABEL: @test_vshll_n_u16(
14551 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14552 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14553 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
14554 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14555 // CHECK: ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)14556 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
14557 return vshll_n_u16(a, 1);
14558 }
14559
14560 // CHECK-LABEL: @test_vshll_n_u32(
14561 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14562 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14563 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
14564 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14565 // CHECK: ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)14566 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
14567 return vshll_n_u32(a, 1);
14568 }
14569
14570 // CHECK-LABEL: @test_vshl_n_s8(
14571 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14572 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)14573 int8x8_t test_vshl_n_s8(int8x8_t a) {
14574 return vshl_n_s8(a, 1);
14575 }
14576
14577 // CHECK-LABEL: @test_vshl_n_s16(
14578 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14579 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14580 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14581 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)14582 int16x4_t test_vshl_n_s16(int16x4_t a) {
14583 return vshl_n_s16(a, 1);
14584 }
14585
14586 // CHECK-LABEL: @test_vshl_n_s32(
14587 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14588 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14589 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14590 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)14591 int32x2_t test_vshl_n_s32(int32x2_t a) {
14592 return vshl_n_s32(a, 1);
14593 }
14594
14595 // CHECK-LABEL: @test_vshl_n_s64(
14596 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14597 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14598 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14599 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)14600 int64x1_t test_vshl_n_s64(int64x1_t a) {
14601 return vshl_n_s64(a, 1);
14602 }
14603
14604 // CHECK-LABEL: @test_vshl_n_u8(
14605 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14606 // CHECK: ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(uint8x8_t a)14607 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
14608 return vshl_n_u8(a, 1);
14609 }
14610
14611 // CHECK-LABEL: @test_vshl_n_u16(
14612 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14613 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14614 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14615 // CHECK: ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(uint16x4_t a)14616 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
14617 return vshl_n_u16(a, 1);
14618 }
14619
14620 // CHECK-LABEL: @test_vshl_n_u32(
14621 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14622 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14623 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14624 // CHECK: ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(uint32x2_t a)14625 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
14626 return vshl_n_u32(a, 1);
14627 }
14628
14629 // CHECK-LABEL: @test_vshl_n_u64(
14630 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14631 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14632 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14633 // CHECK: ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)14634 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14635 return vshl_n_u64(a, 1);
14636 }
14637
14638 // CHECK-LABEL: @test_vshlq_n_s8(
14639 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14640 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)14641 int8x16_t test_vshlq_n_s8(int8x16_t a) {
14642 return vshlq_n_s8(a, 1);
14643 }
14644
14645 // CHECK-LABEL: @test_vshlq_n_s16(
14646 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14647 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14648 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14649 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)14650 int16x8_t test_vshlq_n_s16(int16x8_t a) {
14651 return vshlq_n_s16(a, 1);
14652 }
14653
14654 // CHECK-LABEL: @test_vshlq_n_s32(
14655 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14656 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14657 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14658 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)14659 int32x4_t test_vshlq_n_s32(int32x4_t a) {
14660 return vshlq_n_s32(a, 1);
14661 }
14662
14663 // CHECK-LABEL: @test_vshlq_n_s64(
14664 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14665 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14666 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14667 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)14668 int64x2_t test_vshlq_n_s64(int64x2_t a) {
14669 return vshlq_n_s64(a, 1);
14670 }
14671
14672 // CHECK-LABEL: @test_vshlq_n_u8(
14673 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14674 // CHECK: ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(uint8x16_t a)14675 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
14676 return vshlq_n_u8(a, 1);
14677 }
14678
14679 // CHECK-LABEL: @test_vshlq_n_u16(
14680 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14681 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14682 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14683 // CHECK: ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(uint16x8_t a)14684 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
14685 return vshlq_n_u16(a, 1);
14686 }
14687
14688 // CHECK-LABEL: @test_vshlq_n_u32(
14689 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14690 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14691 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14692 // CHECK: ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(uint32x4_t a)14693 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
14694 return vshlq_n_u32(a, 1);
14695 }
14696
14697 // CHECK-LABEL: @test_vshlq_n_u64(
14698 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14699 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14700 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14701 // CHECK: ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(uint64x2_t a)14702 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
14703 return vshlq_n_u64(a, 1);
14704 }
14705
14706 // CHECK-LABEL: @test_vshrn_n_s16(
14707 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14708 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14709 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14710 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14711 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)14712 int8x8_t test_vshrn_n_s16(int16x8_t a) {
14713 return vshrn_n_s16(a, 1);
14714 }
14715
14716 // CHECK-LABEL: @test_vshrn_n_s32(
14717 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14718 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14719 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14720 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14721 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)14722 int16x4_t test_vshrn_n_s32(int32x4_t a) {
14723 return vshrn_n_s32(a, 1);
14724 }
14725
14726 // CHECK-LABEL: @test_vshrn_n_s64(
14727 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14728 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14729 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14730 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14731 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)14732 int32x2_t test_vshrn_n_s64(int64x2_t a) {
14733 return vshrn_n_s64(a, 1);
14734 }
14735
14736 // CHECK-LABEL: @test_vshrn_n_u16(
14737 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14738 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14739 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14740 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14741 // CHECK: ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)14742 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
14743 return vshrn_n_u16(a, 1);
14744 }
14745
14746 // CHECK-LABEL: @test_vshrn_n_u32(
14747 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14748 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14749 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14750 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14751 // CHECK: ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)14752 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
14753 return vshrn_n_u32(a, 1);
14754 }
14755
14756 // CHECK-LABEL: @test_vshrn_n_u64(
14757 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14758 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14759 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14760 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14761 // CHECK: ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)14762 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
14763 return vshrn_n_u64(a, 1);
14764 }
14765
14766 // CHECK-LABEL: @test_vshr_n_s8(
14767 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14768 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)14769 int8x8_t test_vshr_n_s8(int8x8_t a) {
14770 return vshr_n_s8(a, 1);
14771 }
14772
14773 // CHECK-LABEL: @test_vshr_n_s16(
14774 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14775 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14776 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14777 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)14778 int16x4_t test_vshr_n_s16(int16x4_t a) {
14779 return vshr_n_s16(a, 1);
14780 }
14781
14782 // CHECK-LABEL: @test_vshr_n_s32(
14783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14784 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14785 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1>
14786 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)14787 int32x2_t test_vshr_n_s32(int32x2_t a) {
14788 return vshr_n_s32(a, 1);
14789 }
14790
14791 // CHECK-LABEL: @test_vshr_n_s64(
14792 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14793 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14794 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14795 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)14796 int64x1_t test_vshr_n_s64(int64x1_t a) {
14797 return vshr_n_s64(a, 1);
14798 }
14799
14800 // CHECK-LABEL: @test_vshr_n_u8(
14801 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14802 // CHECK: ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(uint8x8_t a)14803 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
14804 return vshr_n_u8(a, 1);
14805 }
14806
14807 // CHECK-LABEL: @test_vshr_n_u16(
14808 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14809 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14810 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14811 // CHECK: ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(uint16x4_t a)14812 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
14813 return vshr_n_u16(a, 1);
14814 }
14815
14816 // CHECK-LABEL: @test_vshr_n_u32(
14817 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14818 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14819 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
14820 // CHECK: ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(uint32x2_t a)14821 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
14822 return vshr_n_u32(a, 1);
14823 }
14824
14825 // CHECK-LABEL: @test_vshr_n_u64(
14826 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14827 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14828 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14829 // CHECK: ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)14830 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14831 return vshr_n_u64(a, 1);
14832 }
14833
14834 // CHECK-LABEL: @test_vshrq_n_s8(
14835 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14836 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)14837 int8x16_t test_vshrq_n_s8(int8x16_t a) {
14838 return vshrq_n_s8(a, 1);
14839 }
14840
14841 // CHECK-LABEL: @test_vshrq_n_s16(
14842 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14843 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14844 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14845 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)14846 int16x8_t test_vshrq_n_s16(int16x8_t a) {
14847 return vshrq_n_s16(a, 1);
14848 }
14849
14850 // CHECK-LABEL: @test_vshrq_n_s32(
14851 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14852 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14853 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14854 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)14855 int32x4_t test_vshrq_n_s32(int32x4_t a) {
14856 return vshrq_n_s32(a, 1);
14857 }
14858
14859 // CHECK-LABEL: @test_vshrq_n_s64(
14860 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14861 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14862 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14863 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)14864 int64x2_t test_vshrq_n_s64(int64x2_t a) {
14865 return vshrq_n_s64(a, 1);
14866 }
14867
14868 // CHECK-LABEL: @test_vshrq_n_u8(
14869 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14870 // CHECK: ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(uint8x16_t a)14871 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
14872 return vshrq_n_u8(a, 1);
14873 }
14874
14875 // CHECK-LABEL: @test_vshrq_n_u16(
14876 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14877 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14878 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14879 // CHECK: ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(uint16x8_t a)14880 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
14881 return vshrq_n_u16(a, 1);
14882 }
14883
14884 // CHECK-LABEL: @test_vshrq_n_u32(
14885 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14886 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14887 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14888 // CHECK: ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(uint32x4_t a)14889 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
14890 return vshrq_n_u32(a, 1);
14891 }
14892
14893 // CHECK-LABEL: @test_vshrq_n_u64(
14894 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14895 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14896 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14897 // CHECK: ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(uint64x2_t a)14898 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
14899 return vshrq_n_u64(a, 1);
14900 }
14901
14902 // CHECK-LABEL: @test_vsli_n_s8(
14903 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14904 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)14905 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
14906 return vsli_n_s8(a, b, 1);
14907 }
14908
14909 // CHECK-LABEL: @test_vsli_n_s16(
14910 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14911 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14912 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14913 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14914 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14915 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)14916 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
14917 return vsli_n_s16(a, b, 1);
14918 }
14919
14920 // CHECK-LABEL: @test_vsli_n_s32(
14921 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14922 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14923 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14924 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14925 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
14926 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)14927 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
14928 return vsli_n_s32(a, b, 1);
14929 }
14930
14931 // CHECK-LABEL: @test_vsli_n_s64(
14932 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14933 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14934 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14935 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14936 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
14937 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)14938 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14939 return vsli_n_s64(a, b, 1);
14940 }
14941
14942 // CHECK-LABEL: @test_vsli_n_u8(
14943 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14944 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)14945 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
14946 return vsli_n_u8(a, b, 1);
14947 }
14948
14949 // CHECK-LABEL: @test_vsli_n_u16(
14950 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14951 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14952 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14953 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14954 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14955 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)14956 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
14957 return vsli_n_u16(a, b, 1);
14958 }
14959
14960 // CHECK-LABEL: @test_vsli_n_u32(
14961 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14962 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14963 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14964 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14965 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
14966 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)14967 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
14968 return vsli_n_u32(a, b, 1);
14969 }
14970
14971 // CHECK-LABEL: @test_vsli_n_u64(
14972 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14973 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14974 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14975 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14976 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
14977 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)14978 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
14979 return vsli_n_u64(a, b, 1);
14980 }
14981
14982 // CHECK-LABEL: @test_vsli_n_p8(
14983 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14984 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)14985 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
14986 return vsli_n_p8(a, b, 1);
14987 }
14988
14989 // CHECK-LABEL: @test_vsli_n_p16(
14990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14991 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14992 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14993 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14994 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14995 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)14996 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
14997 return vsli_n_p16(a, b, 1);
14998 }
14999
15000 // CHECK-LABEL: @test_vsliq_n_s8(
15001 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15002 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)15003 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
15004 return vsliq_n_s8(a, b, 1);
15005 }
15006
15007 // CHECK-LABEL: @test_vsliq_n_s16(
15008 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15009 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15010 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15011 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15012 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15013 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)15014 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
15015 return vsliq_n_s16(a, b, 1);
15016 }
15017
15018 // CHECK-LABEL: @test_vsliq_n_s32(
15019 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15020 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15021 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15022 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15023 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
15024 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)15025 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
15026 return vsliq_n_s32(a, b, 1);
15027 }
15028
15029 // CHECK-LABEL: @test_vsliq_n_s64(
15030 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15031 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15032 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15033 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15034 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
15035 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)15036 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
15037 return vsliq_n_s64(a, b, 1);
15038 }
15039
15040 // CHECK-LABEL: @test_vsliq_n_u8(
15041 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15042 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)15043 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
15044 return vsliq_n_u8(a, b, 1);
15045 }
15046
15047 // CHECK-LABEL: @test_vsliq_n_u16(
15048 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15049 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15050 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15051 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15052 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15053 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)15054 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
15055 return vsliq_n_u16(a, b, 1);
15056 }
15057
15058 // CHECK-LABEL: @test_vsliq_n_u32(
15059 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15060 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15061 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15062 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15063 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
15064 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)15065 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
15066 return vsliq_n_u32(a, b, 1);
15067 }
15068
15069 // CHECK-LABEL: @test_vsliq_n_u64(
15070 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15071 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15072 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15073 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15074 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
15075 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)15076 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
15077 return vsliq_n_u64(a, b, 1);
15078 }
15079
15080 // CHECK-LABEL: @test_vsliq_n_p8(
15081 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15082 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)15083 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
15084 return vsliq_n_p8(a, b, 1);
15085 }
15086
15087 // CHECK-LABEL: @test_vsliq_n_p16(
15088 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15089 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15090 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15091 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15092 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15093 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)15094 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
15095 return vsliq_n_p16(a, b, 1);
15096 }
15097
15098 // CHECK-LABEL: @test_vsra_n_s8(
15099 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15100 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
15101 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)15102 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
15103 return vsra_n_s8(a, b, 1);
15104 }
15105
15106 // CHECK-LABEL: @test_vsra_n_s16(
15107 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15108 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15109 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15110 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15111 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
15112 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
15113 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)15114 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
15115 return vsra_n_s16(a, b, 1);
15116 }
15117
15118 // CHECK-LABEL: @test_vsra_n_s32(
15119 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15120 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15121 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15122 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15123 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 1, i32 1>
15124 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
15125 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)15126 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
15127 return vsra_n_s32(a, b, 1);
15128 }
15129
15130 // CHECK-LABEL: @test_vsra_n_s64(
15131 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15132 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15133 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15134 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15135 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
15136 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
15137 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)15138 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
15139 return vsra_n_s64(a, b, 1);
15140 }
15141
15142 // CHECK-LABEL: @test_vsra_n_u8(
15143 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15144 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
15145 // CHECK: ret <8 x i8> [[TMP0]]
test_vsra_n_u8(uint8x8_t a,uint8x8_t b)15146 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
15147 return vsra_n_u8(a, b, 1);
15148 }
15149
15150 // CHECK-LABEL: @test_vsra_n_u16(
15151 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15152 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15153 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15154 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15155 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
15156 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
15157 // CHECK: ret <4 x i16> [[TMP4]]
test_vsra_n_u16(uint16x4_t a,uint16x4_t b)15158 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
15159 return vsra_n_u16(a, b, 1);
15160 }
15161
15162 // CHECK-LABEL: @test_vsra_n_u32(
15163 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15164 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15165 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15166 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15167 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1>
15168 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
15169 // CHECK: ret <2 x i32> [[TMP4]]
test_vsra_n_u32(uint32x2_t a,uint32x2_t b)15170 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
15171 return vsra_n_u32(a, b, 1);
15172 }
15173
15174 // CHECK-LABEL: @test_vsra_n_u64(
15175 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15176 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15177 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15178 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15179 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
15180 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
15181 // CHECK: ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)15182 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
15183 return vsra_n_u64(a, b, 1);
15184 }
15185
15186 // CHECK-LABEL: @test_vsraq_n_s8(
15187 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15188 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
15189 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)15190 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
15191 return vsraq_n_s8(a, b, 1);
15192 }
15193
15194 // CHECK-LABEL: @test_vsraq_n_s16(
15195 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15196 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15197 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15198 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15199 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
15200 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
15201 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)15202 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
15203 return vsraq_n_s16(a, b, 1);
15204 }
15205
15206 // CHECK-LABEL: @test_vsraq_n_s32(
15207 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15208 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15209 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15210 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15211 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
15212 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
15213 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)15214 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
15215 return vsraq_n_s32(a, b, 1);
15216 }
15217
15218 // CHECK-LABEL: @test_vsraq_n_s64(
15219 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15220 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15221 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15222 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15223 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 1, i64 1>
15224 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
15225 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)15226 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
15227 return vsraq_n_s64(a, b, 1);
15228 }
15229
15230 // CHECK-LABEL: @test_vsraq_n_u8(
15231 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15232 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
15233 // CHECK: ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(uint8x16_t a,uint8x16_t b)15234 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
15235 return vsraq_n_u8(a, b, 1);
15236 }
15237
15238 // CHECK-LABEL: @test_vsraq_n_u16(
15239 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15240 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15241 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15242 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15243 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
15244 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
15245 // CHECK: ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(uint16x8_t a,uint16x8_t b)15246 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
15247 return vsraq_n_u16(a, b, 1);
15248 }
15249
15250 // CHECK-LABEL: @test_vsraq_n_u32(
15251 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15252 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15253 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15254 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15255 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
15256 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
15257 // CHECK: ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(uint32x4_t a,uint32x4_t b)15258 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
15259 return vsraq_n_u32(a, b, 1);
15260 }
15261
15262 // CHECK-LABEL: @test_vsraq_n_u64(
15263 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15264 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15265 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15266 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15267 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 1, i64 1>
15268 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
15269 // CHECK: ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(uint64x2_t a,uint64x2_t b)15270 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
15271 return vsraq_n_u64(a, b, 1);
15272 }
15273
15274 // CHECK-LABEL: @test_vsri_n_s8(
15275 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15276 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)15277 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
15278 return vsri_n_s8(a, b, 1);
15279 }
15280
15281 // CHECK-LABEL: @test_vsri_n_s16(
15282 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15283 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15284 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15285 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15286 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15287 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)15288 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
15289 return vsri_n_s16(a, b, 1);
15290 }
15291
15292 // CHECK-LABEL: @test_vsri_n_s32(
15293 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15294 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15295 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15296 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15297 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
15298 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)15299 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
15300 return vsri_n_s32(a, b, 1);
15301 }
15302
15303 // CHECK-LABEL: @test_vsri_n_s64(
15304 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15305 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15306 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15307 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15308 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
15309 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)15310 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
15311 return vsri_n_s64(a, b, 1);
15312 }
15313
15314 // CHECK-LABEL: @test_vsri_n_u8(
15315 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15316 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsri_n_u8(uint8x8_t a,uint8x8_t b)15317 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
15318 return vsri_n_u8(a, b, 1);
15319 }
15320
15321 // CHECK-LABEL: @test_vsri_n_u16(
15322 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15323 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15324 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15325 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15326 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15327 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsri_n_u16(uint16x4_t a,uint16x4_t b)15328 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
15329 return vsri_n_u16(a, b, 1);
15330 }
15331
15332 // CHECK-LABEL: @test_vsri_n_u32(
15333 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15334 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15335 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15336 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15337 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
15338 // CHECK: ret <2 x i32> [[VSLI_N2]]
test_vsri_n_u32(uint32x2_t a,uint32x2_t b)15339 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
15340 return vsri_n_u32(a, b, 1);
15341 }
15342
15343 // CHECK-LABEL: @test_vsri_n_u64(
15344 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15345 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15346 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15347 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15348 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
15349 // CHECK: ret <1 x i64> [[VSLI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)15350 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
15351 return vsri_n_u64(a, b, 1);
15352 }
15353
15354 // CHECK-LABEL: @test_vsri_n_p8(
15355 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15356 // CHECK: ret <8 x i8> [[VSLI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)15357 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
15358 return vsri_n_p8(a, b, 1);
15359 }
15360
15361 // CHECK-LABEL: @test_vsri_n_p16(
15362 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15363 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15364 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15365 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15366 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15367 // CHECK: ret <4 x i16> [[VSLI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)15368 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
15369 return vsri_n_p16(a, b, 1);
15370 }
15371
15372 // CHECK-LABEL: @test_vsriq_n_s8(
15373 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15374 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)15375 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
15376 return vsriq_n_s8(a, b, 1);
15377 }
15378
15379 // CHECK-LABEL: @test_vsriq_n_s16(
15380 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15381 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15382 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15383 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15384 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15385 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)15386 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
15387 return vsriq_n_s16(a, b, 1);
15388 }
15389
15390 // CHECK-LABEL: @test_vsriq_n_s32(
15391 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15392 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15393 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15394 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15395 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
15396 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)15397 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
15398 return vsriq_n_s32(a, b, 1);
15399 }
15400
15401 // CHECK-LABEL: @test_vsriq_n_s64(
15402 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15403 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15404 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15405 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15406 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15407 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)15408 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
15409 return vsriq_n_s64(a, b, 1);
15410 }
15411
15412 // CHECK-LABEL: @test_vsriq_n_u8(
15413 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15414 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsriq_n_u8(uint8x16_t a,uint8x16_t b)15415 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
15416 return vsriq_n_u8(a, b, 1);
15417 }
15418
15419 // CHECK-LABEL: @test_vsriq_n_u16(
15420 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15421 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15422 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15423 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15424 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15425 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_u16(uint16x8_t a,uint16x8_t b)15426 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
15427 return vsriq_n_u16(a, b, 1);
15428 }
15429
15430 // CHECK-LABEL: @test_vsriq_n_u32(
15431 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15432 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15433 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15434 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15435 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
15436 // CHECK: ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_u32(uint32x4_t a,uint32x4_t b)15437 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
15438 return vsriq_n_u32(a, b, 1);
15439 }
15440
15441 // CHECK-LABEL: @test_vsriq_n_u64(
15442 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15443 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15444 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15445 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15446 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15447 // CHECK: ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_u64(uint64x2_t a,uint64x2_t b)15448 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
15449 return vsriq_n_u64(a, b, 1);
15450 }
15451
15452 // CHECK-LABEL: @test_vsriq_n_p8(
15453 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15454 // CHECK: ret <16 x i8> [[VSLI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)15455 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
15456 return vsriq_n_p8(a, b, 1);
15457 }
15458
15459 // CHECK-LABEL: @test_vsriq_n_p16(
15460 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15461 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15462 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15463 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15464 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15465 // CHECK: ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)15466 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
15467 return vsriq_n_p16(a, b, 1);
15468 }
15469
15470 // CHECK-LABEL: @test_vst1q_u8(
15471 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15472 // CHECK: ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)15473 void test_vst1q_u8(uint8_t * a, uint8x16_t b) {
15474 vst1q_u8(a, b);
15475 }
15476
15477 // CHECK-LABEL: @test_vst1q_u16(
15478 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15479 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15480 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15481 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15482 // CHECK: ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)15483 void test_vst1q_u16(uint16_t * a, uint16x8_t b) {
15484 vst1q_u16(a, b);
15485 }
15486
15487 // CHECK-LABEL: @test_vst1q_u32(
15488 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15489 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15490 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15491 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
15492 // CHECK: ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)15493 void test_vst1q_u32(uint32_t * a, uint32x4_t b) {
15494 vst1q_u32(a, b);
15495 }
15496
15497 // CHECK-LABEL: @test_vst1q_u64(
15498 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15499 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15500 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15501 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
15502 // CHECK: ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)15503 void test_vst1q_u64(uint64_t * a, uint64x2_t b) {
15504 vst1q_u64(a, b);
15505 }
15506
15507 // CHECK-LABEL: @test_vst1q_s8(
15508 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15509 // CHECK: ret void
test_vst1q_s8(int8_t * a,int8x16_t b)15510 void test_vst1q_s8(int8_t * a, int8x16_t b) {
15511 vst1q_s8(a, b);
15512 }
15513
15514 // CHECK-LABEL: @test_vst1q_s16(
15515 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15516 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15517 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15518 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15519 // CHECK: ret void
test_vst1q_s16(int16_t * a,int16x8_t b)15520 void test_vst1q_s16(int16_t * a, int16x8_t b) {
15521 vst1q_s16(a, b);
15522 }
15523
15524 // CHECK-LABEL: @test_vst1q_s32(
15525 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15526 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15527 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15528 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
15529 // CHECK: ret void
test_vst1q_s32(int32_t * a,int32x4_t b)15530 void test_vst1q_s32(int32_t * a, int32x4_t b) {
15531 vst1q_s32(a, b);
15532 }
15533
15534 // CHECK-LABEL: @test_vst1q_s64(
15535 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15536 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15537 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15538 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
15539 // CHECK: ret void
test_vst1q_s64(int64_t * a,int64x2_t b)15540 void test_vst1q_s64(int64_t * a, int64x2_t b) {
15541 vst1q_s64(a, b);
15542 }
15543
15544 // CHECK-LABEL: @test_vst1q_f16(
15545 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
15546 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15547 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15548 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* [[TMP0]], <8 x half> [[TMP2]], i32 2)
15549 // CHECK: ret void
test_vst1q_f16(float16_t * a,float16x8_t b)15550 void test_vst1q_f16(float16_t * a, float16x8_t b) {
15551 vst1q_f16(a, b);
15552 }
15553
15554 // CHECK-LABEL: @test_vst1q_f32(
15555 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
15556 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15557 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15558 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* [[TMP0]], <4 x float> [[TMP2]], i32 4)
15559 // CHECK: ret void
test_vst1q_f32(float32_t * a,float32x4_t b)15560 void test_vst1q_f32(float32_t * a, float32x4_t b) {
15561 vst1q_f32(a, b);
15562 }
15563
15564 // CHECK-LABEL: @test_vst1q_p8(
15565 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15566 // CHECK: ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)15567 void test_vst1q_p8(poly8_t * a, poly8x16_t b) {
15568 vst1q_p8(a, b);
15569 }
15570
15571 // CHECK-LABEL: @test_vst1q_p16(
15572 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15573 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15574 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15575 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15576 // CHECK: ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)15577 void test_vst1q_p16(poly16_t * a, poly16x8_t b) {
15578 vst1q_p16(a, b);
15579 }
15580
15581 // CHECK-LABEL: @test_vst1_u8(
15582 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15583 // CHECK: ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)15584 void test_vst1_u8(uint8_t * a, uint8x8_t b) {
15585 vst1_u8(a, b);
15586 }
15587
15588 // CHECK-LABEL: @test_vst1_u16(
15589 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15590 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15591 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15592 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15593 // CHECK: ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)15594 void test_vst1_u16(uint16_t * a, uint16x4_t b) {
15595 vst1_u16(a, b);
15596 }
15597
15598 // CHECK-LABEL: @test_vst1_u32(
15599 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15600 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15601 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15602 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
15603 // CHECK: ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)15604 void test_vst1_u32(uint32_t * a, uint32x2_t b) {
15605 vst1_u32(a, b);
15606 }
15607
15608 // CHECK-LABEL: @test_vst1_u64(
15609 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15610 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15611 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15612 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
15613 // CHECK: ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)15614 void test_vst1_u64(uint64_t * a, uint64x1_t b) {
15615 vst1_u64(a, b);
15616 }
15617
15618 // CHECK-LABEL: @test_vst1_s8(
15619 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15620 // CHECK: ret void
test_vst1_s8(int8_t * a,int8x8_t b)15621 void test_vst1_s8(int8_t * a, int8x8_t b) {
15622 vst1_s8(a, b);
15623 }
15624
15625 // CHECK-LABEL: @test_vst1_s16(
15626 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15627 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15628 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15629 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15630 // CHECK: ret void
test_vst1_s16(int16_t * a,int16x4_t b)15631 void test_vst1_s16(int16_t * a, int16x4_t b) {
15632 vst1_s16(a, b);
15633 }
15634
15635 // CHECK-LABEL: @test_vst1_s32(
15636 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15637 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15638 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15639 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
15640 // CHECK: ret void
test_vst1_s32(int32_t * a,int32x2_t b)15641 void test_vst1_s32(int32_t * a, int32x2_t b) {
15642 vst1_s32(a, b);
15643 }
15644
15645 // CHECK-LABEL: @test_vst1_s64(
15646 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15647 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15648 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15649 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
15650 // CHECK: ret void
test_vst1_s64(int64_t * a,int64x1_t b)15651 void test_vst1_s64(int64_t * a, int64x1_t b) {
15652 vst1_s64(a, b);
15653 }
15654
15655 // CHECK-LABEL: @test_vst1_f16(
15656 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
15657 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15658 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15659 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* [[TMP0]], <4 x half> [[TMP2]], i32 2)
15660 // CHECK: ret void
test_vst1_f16(float16_t * a,float16x4_t b)15661 void test_vst1_f16(float16_t * a, float16x4_t b) {
15662 vst1_f16(a, b);
15663 }
15664
15665 // CHECK-LABEL: @test_vst1_f32(
15666 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
15667 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15668 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15669 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* [[TMP0]], <2 x float> [[TMP2]], i32 4)
15670 // CHECK: ret void
test_vst1_f32(float32_t * a,float32x2_t b)15671 void test_vst1_f32(float32_t * a, float32x2_t b) {
15672 vst1_f32(a, b);
15673 }
15674
15675 // CHECK-LABEL: @test_vst1_p8(
15676 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15677 // CHECK: ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)15678 void test_vst1_p8(poly8_t * a, poly8x8_t b) {
15679 vst1_p8(a, b);
15680 }
15681
15682 // CHECK-LABEL: @test_vst1_p16(
15683 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15684 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15685 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15686 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15687 // CHECK: ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)15688 void test_vst1_p16(poly16_t * a, poly16x4_t b) {
15689 vst1_p16(a, b);
15690 }
15691
15692 // CHECK-LABEL: @test_vst1q_lane_u8(
15693 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15694 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15695 // CHECK: ret void
test_vst1q_lane_u8(uint8_t * a,uint8x16_t b)15696 void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) {
15697 vst1q_lane_u8(a, b, 15);
15698 }
15699
15700 // CHECK-LABEL: @test_vst1q_lane_u16(
15701 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15702 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15703 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15704 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15705 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15706 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15707 // CHECK: ret void
test_vst1q_lane_u16(uint16_t * a,uint16x8_t b)15708 void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) {
15709 vst1q_lane_u16(a, b, 7);
15710 }
15711
15712 // CHECK-LABEL: @test_vst1q_lane_u32(
15713 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15714 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15715 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15716 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15717 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15718 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
15719 // CHECK: ret void
test_vst1q_lane_u32(uint32_t * a,uint32x4_t b)15720 void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) {
15721 vst1q_lane_u32(a, b, 3);
15722 }
15723
15724 // CHECK-LABEL: @test_vst1q_lane_u64(
15725 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15726 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15727 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15728 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15729 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
15730 // CHECK: ret void
test_vst1q_lane_u64(uint64_t * a,uint64x2_t b)15731 void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) {
15732 vst1q_lane_u64(a, b, 1);
15733 }
15734
15735 // CHECK-LABEL: @test_vst1q_lane_s8(
15736 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15737 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15738 // CHECK: ret void
test_vst1q_lane_s8(int8_t * a,int8x16_t b)15739 void test_vst1q_lane_s8(int8_t * a, int8x16_t b) {
15740 vst1q_lane_s8(a, b, 15);
15741 }
15742
15743 // CHECK-LABEL: @test_vst1q_lane_s16(
15744 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15745 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15746 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15747 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15748 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15749 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15750 // CHECK: ret void
test_vst1q_lane_s16(int16_t * a,int16x8_t b)15751 void test_vst1q_lane_s16(int16_t * a, int16x8_t b) {
15752 vst1q_lane_s16(a, b, 7);
15753 }
15754
15755 // CHECK-LABEL: @test_vst1q_lane_s32(
15756 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15757 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15758 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15759 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15760 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15761 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
15762 // CHECK: ret void
test_vst1q_lane_s32(int32_t * a,int32x4_t b)15763 void test_vst1q_lane_s32(int32_t * a, int32x4_t b) {
15764 vst1q_lane_s32(a, b, 3);
15765 }
15766
15767 // CHECK-LABEL: @test_vst1q_lane_s64(
15768 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15769 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15770 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15771 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15772 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
15773 // CHECK: ret void
test_vst1q_lane_s64(int64_t * a,int64x2_t b)15774 void test_vst1q_lane_s64(int64_t * a, int64x2_t b) {
15775 vst1q_lane_s64(a, b, 1);
15776 }
15777
15778 // CHECK-LABEL: @test_vst1q_lane_f16(
15779 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
15780 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15781 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15782 // CHECK: [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
15783 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
15784 // CHECK: store half [[TMP3]], half* [[TMP4]], align 2
15785 // CHECK: ret void
test_vst1q_lane_f16(float16_t * a,float16x8_t b)15786 void test_vst1q_lane_f16(float16_t * a, float16x8_t b) {
15787 vst1q_lane_f16(a, b, 7);
15788 }
15789
15790 // CHECK-LABEL: @test_vst1q_lane_f32(
15791 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
15792 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15793 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15794 // CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
15795 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
15796 // CHECK: store float [[TMP3]], float* [[TMP4]], align 4
15797 // CHECK: ret void
test_vst1q_lane_f32(float32_t * a,float32x4_t b)15798 void test_vst1q_lane_f32(float32_t * a, float32x4_t b) {
15799 vst1q_lane_f32(a, b, 3);
15800 }
15801
15802 // CHECK-LABEL: @test_vst1q_lane_p8(
15803 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15804 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15805 // CHECK: ret void
test_vst1q_lane_p8(poly8_t * a,poly8x16_t b)15806 void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) {
15807 vst1q_lane_p8(a, b, 15);
15808 }
15809
15810 // CHECK-LABEL: @test_vst1q_lane_p16(
15811 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15812 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15813 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15814 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15815 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15816 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15817 // CHECK: ret void
test_vst1q_lane_p16(poly16_t * a,poly16x8_t b)15818 void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) {
15819 vst1q_lane_p16(a, b, 7);
15820 }
15821
15822 // CHECK-LABEL: @test_vst1_lane_u8(
15823 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15824 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15825 // CHECK: ret void
test_vst1_lane_u8(uint8_t * a,uint8x8_t b)15826 void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) {
15827 vst1_lane_u8(a, b, 7);
15828 }
15829
15830 // CHECK-LABEL: @test_vst1_lane_u16(
15831 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15832 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15833 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15834 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15835 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15836 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15837 // CHECK: ret void
test_vst1_lane_u16(uint16_t * a,uint16x4_t b)15838 void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) {
15839 vst1_lane_u16(a, b, 3);
15840 }
15841
15842 // CHECK-LABEL: @test_vst1_lane_u32(
15843 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15844 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15845 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15846 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15847 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15848 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
15849 // CHECK: ret void
test_vst1_lane_u32(uint32_t * a,uint32x2_t b)15850 void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) {
15851 vst1_lane_u32(a, b, 1);
15852 }
15853
15854 // CHECK-LABEL: @test_vst1_lane_u64(
15855 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15856 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15857 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15858 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15859 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
15860 // CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4
15861 // CHECK: ret void
test_vst1_lane_u64(uint64_t * a,uint64x1_t b)15862 void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) {
15863 vst1_lane_u64(a, b, 0);
15864 }
15865
15866 // CHECK-LABEL: @test_vst1_lane_s8(
15867 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15868 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15869 // CHECK: ret void
test_vst1_lane_s8(int8_t * a,int8x8_t b)15870 void test_vst1_lane_s8(int8_t * a, int8x8_t b) {
15871 vst1_lane_s8(a, b, 7);
15872 }
15873
15874 // CHECK-LABEL: @test_vst1_lane_s16(
15875 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15876 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15877 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15878 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15879 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15880 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15881 // CHECK: ret void
test_vst1_lane_s16(int16_t * a,int16x4_t b)15882 void test_vst1_lane_s16(int16_t * a, int16x4_t b) {
15883 vst1_lane_s16(a, b, 3);
15884 }
15885
15886 // CHECK-LABEL: @test_vst1_lane_s32(
15887 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
15888 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15889 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15890 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15891 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15892 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4
15893 // CHECK: ret void
test_vst1_lane_s32(int32_t * a,int32x2_t b)15894 void test_vst1_lane_s32(int32_t * a, int32x2_t b) {
15895 vst1_lane_s32(a, b, 1);
15896 }
15897
15898 // CHECK-LABEL: @test_vst1_lane_s64(
15899 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
15900 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15901 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15902 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15903 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
15904 // CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4
15905 // CHECK: ret void
test_vst1_lane_s64(int64_t * a,int64x1_t b)15906 void test_vst1_lane_s64(int64_t * a, int64x1_t b) {
15907 vst1_lane_s64(a, b, 0);
15908 }
15909
15910 // CHECK-LABEL: @test_vst1_lane_f16(
15911 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
15912 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15913 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15914 // CHECK: [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
15915 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
15916 // CHECK: store half [[TMP3]], half* [[TMP4]], align 2
15917 // CHECK: ret void
test_vst1_lane_f16(float16_t * a,float16x4_t b)15918 void test_vst1_lane_f16(float16_t * a, float16x4_t b) {
15919 vst1_lane_f16(a, b, 3);
15920 }
15921
15922 // CHECK-LABEL: @test_vst1_lane_f32(
15923 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
15924 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15925 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15926 // CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
15927 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
15928 // CHECK: store float [[TMP3]], float* [[TMP4]], align 4
15929 // CHECK: ret void
test_vst1_lane_f32(float32_t * a,float32x2_t b)15930 void test_vst1_lane_f32(float32_t * a, float32x2_t b) {
15931 vst1_lane_f32(a, b, 1);
15932 }
15933
15934 // CHECK-LABEL: @test_vst1_lane_p8(
15935 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15936 // CHECK: store i8 [[TMP0]], i8* %a, align 1
15937 // CHECK: ret void
test_vst1_lane_p8(poly8_t * a,poly8x8_t b)15938 void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) {
15939 vst1_lane_p8(a, b, 7);
15940 }
15941
15942 // CHECK-LABEL: @test_vst1_lane_p16(
15943 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
15944 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15945 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15946 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15947 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15948 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2
15949 // CHECK: ret void
test_vst1_lane_p16(poly16_t * a,poly16x4_t b)15950 void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) {
15951 vst1_lane_p16(a, b, 3);
15952 }
15953
15954 // CHECK-LABEL: @test_vst2q_u8(
15955 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
15956 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
15957 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
15958 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
15959 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
15960 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
15961 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
15962 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
15963 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15964 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
15965 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15966 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15967 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
15968 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15969 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
15970 // CHECK: ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)15971 void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) {
15972 vst2q_u8(a, b);
15973 }
15974
15975 // CHECK-LABEL: @test_vst2q_u16(
15976 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
15977 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
15978 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
15979 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
15980 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
15981 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
15982 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
15983 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
15984 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
15985 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15986 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
15987 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15988 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
15989 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15990 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
15991 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15992 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
15993 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
15994 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
15995 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
15996 // CHECK: ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)15997 void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) {
15998 vst2q_u16(a, b);
15999 }
16000
16001 // CHECK-LABEL: @test_vst2q_u32(
16002 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
16003 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
16004 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
16005 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16006 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16007 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
16008 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
16009 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16010 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16011 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16012 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16013 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16014 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16015 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16016 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16017 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16018 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16019 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16020 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16021 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
16022 // CHECK: ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)16023 void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) {
16024 vst2q_u32(a, b);
16025 }
16026
16027 // CHECK-LABEL: @test_vst2q_s8(
16028 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
16029 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
16030 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
16031 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16032 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16033 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
16034 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
16035 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16036 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
16037 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16038 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16039 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
16040 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16041 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16042 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16043 // CHECK: ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)16044 void test_vst2q_s8(int8_t * a, int8x16x2_t b) {
16045 vst2q_s8(a, b);
16046 }
16047
16048 // CHECK-LABEL: @test_vst2q_s16(
16049 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
16050 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
16051 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
16052 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16053 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16054 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
16055 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
16056 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16057 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16058 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16059 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16060 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16061 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16062 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16063 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16064 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16065 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16066 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16067 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16068 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16069 // CHECK: ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)16070 void test_vst2q_s16(int16_t * a, int16x8x2_t b) {
16071 vst2q_s16(a, b);
16072 }
16073
16074 // CHECK-LABEL: @test_vst2q_s32(
16075 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
16076 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
16077 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
16078 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16079 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16080 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
16081 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
16082 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16083 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16084 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16085 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16086 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16087 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16088 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16089 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16090 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16091 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16092 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16093 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16094 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
16095 // CHECK: ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)16096 void test_vst2q_s32(int32_t * a, int32x4x2_t b) {
16097 vst2q_s32(a, b);
16098 }
16099
16100 // CHECK-LABEL: @test_vst2q_f16(
16101 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
16102 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
16103 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
16104 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
16105 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16106 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
16107 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
16108 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16109 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
16110 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16111 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
16112 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16113 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16114 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16115 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
16116 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16117 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16118 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16119 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16120 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 2)
16121 // CHECK: ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)16122 void test_vst2q_f16(float16_t * a, float16x8x2_t b) {
16123 vst2q_f16(a, b);
16124 }
16125
16126 // CHECK-LABEL: @test_vst2q_f32(
16127 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
16128 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
16129 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
16130 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
16131 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16132 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
16133 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
16134 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16135 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
16136 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16137 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
16138 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16139 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16140 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16141 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
16142 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16143 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16144 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16145 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16146 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 4)
16147 // CHECK: ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)16148 void test_vst2q_f32(float32_t * a, float32x4x2_t b) {
16149 vst2q_f32(a, b);
16150 }
16151
16152 // CHECK-LABEL: @test_vst2q_p8(
16153 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
16154 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
16155 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
16156 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16157 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16158 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
16159 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
16160 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16161 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
16162 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16163 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16164 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
16165 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16166 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16167 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16168 // CHECK: ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)16169 void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) {
16170 vst2q_p8(a, b);
16171 }
16172
16173 // CHECK-LABEL: @test_vst2q_p16(
16174 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
16175 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
16176 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
16177 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16178 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16179 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
16180 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
16181 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16182 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16183 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16184 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16185 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16186 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16187 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16188 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16189 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16190 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16191 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16192 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16193 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16194 // CHECK: ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)16195 void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) {
16196 vst2q_p16(a, b);
16197 }
16198
16199 // CHECK-LABEL: @test_vst2_u8(
16200 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
16201 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
16202 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
16203 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16204 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16205 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
16206 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
16207 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16208 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16209 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16210 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16211 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16212 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16213 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16214 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16215 // CHECK: ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)16216 void test_vst2_u8(uint8_t * a, uint8x8x2_t b) {
16217 vst2_u8(a, b);
16218 }
16219
16220 // CHECK-LABEL: @test_vst2_u16(
16221 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
16222 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
16223 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
16224 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16225 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16226 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
16227 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
16228 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16229 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16230 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16231 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16232 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16233 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16234 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16235 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16236 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16237 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16238 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16239 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16240 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16241 // CHECK: ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)16242 void test_vst2_u16(uint16_t * a, uint16x4x2_t b) {
16243 vst2_u16(a, b);
16244 }
16245
16246 // CHECK-LABEL: @test_vst2_u32(
16247 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
16248 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
16249 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
16250 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16251 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16252 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
16253 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
16254 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16255 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16256 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16257 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16258 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16259 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16260 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16261 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16262 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16263 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16264 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16265 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16266 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
16267 // CHECK: ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)16268 void test_vst2_u32(uint32_t * a, uint32x2x2_t b) {
16269 vst2_u32(a, b);
16270 }
16271
16272 // CHECK-LABEL: @test_vst2_u64(
16273 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
16274 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
16275 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
16276 // CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
16277 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16278 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
16279 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
16280 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16281 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
16282 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
16283 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
16284 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16285 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16286 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
16287 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
16288 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16289 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16290 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16291 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16292 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
16293 // CHECK: ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)16294 void test_vst2_u64(uint64_t * a, uint64x1x2_t b) {
16295 vst2_u64(a, b);
16296 }
16297
16298 // CHECK-LABEL: @test_vst2_s8(
16299 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
16300 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
16301 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
16302 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16303 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16304 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
16305 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
16306 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16307 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16308 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16309 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16310 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16311 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16312 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16313 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16314 // CHECK: ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)16315 void test_vst2_s8(int8_t * a, int8x8x2_t b) {
16316 vst2_s8(a, b);
16317 }
16318
16319 // CHECK-LABEL: @test_vst2_s16(
16320 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
16321 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
16322 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
16323 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16324 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16325 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
16326 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
16327 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16328 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16329 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16330 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16331 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16332 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16333 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16334 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16335 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16336 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16337 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16338 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16339 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16340 // CHECK: ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)16341 void test_vst2_s16(int16_t * a, int16x4x2_t b) {
16342 vst2_s16(a, b);
16343 }
16344
16345 // CHECK-LABEL: @test_vst2_s32(
16346 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
16347 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
16348 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
16349 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16350 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16351 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
16352 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
16353 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16354 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16355 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16356 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16357 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16358 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16359 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16360 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16361 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16362 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16363 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16364 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16365 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
16366 // CHECK: ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)16367 void test_vst2_s32(int32_t * a, int32x2x2_t b) {
16368 vst2_s32(a, b);
16369 }
16370
16371 // CHECK-LABEL: @test_vst2_s64(
16372 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
16373 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
16374 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
16375 // CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
16376 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16377 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
16378 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
16379 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16380 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
16381 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
16382 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
16383 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16384 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16385 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
16386 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
16387 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16388 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16389 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16390 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16391 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
16392 // CHECK: ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)16393 void test_vst2_s64(int64_t * a, int64x1x2_t b) {
16394 vst2_s64(a, b);
16395 }
16396
16397 // CHECK-LABEL: @test_vst2_f16(
16398 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16399 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16400 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16401 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
16402 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16403 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16404 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16405 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16406 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
16407 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16408 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
16409 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16410 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16411 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16412 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
16413 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16414 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16415 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16416 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16417 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 2)
16418 // CHECK: ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)16419 void test_vst2_f16(float16_t * a, float16x4x2_t b) {
16420 vst2_f16(a, b);
16421 }
16422
16423 // CHECK-LABEL: @test_vst2_f32(
16424 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16425 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16426 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16427 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
16428 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16429 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16430 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16431 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16432 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
16433 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16434 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
16435 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16436 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16437 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16438 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
16439 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16440 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16441 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16442 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16443 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 4)
16444 // CHECK: ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)16445 void test_vst2_f32(float32_t * a, float32x2x2_t b) {
16446 vst2_f32(a, b);
16447 }
16448
16449 // CHECK-LABEL: @test_vst2_p8(
16450 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16451 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16452 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16453 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16454 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16455 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16456 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16457 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16458 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16459 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16460 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16461 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16462 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16463 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16464 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16465 // CHECK: ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)16466 void test_vst2_p8(poly8_t * a, poly8x8x2_t b) {
16467 vst2_p8(a, b);
16468 }
16469
16470 // CHECK-LABEL: @test_vst2_p16(
16471 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16472 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16473 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16474 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16475 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16476 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16477 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16478 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16479 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16480 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16481 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16482 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16483 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16484 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16485 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16486 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16487 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16488 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16489 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16490 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16491 // CHECK: ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)16492 void test_vst2_p16(poly16_t * a, poly16x4x2_t b) {
16493 vst2_p16(a, b);
16494 }
16495
16496 // CHECK-LABEL: @test_vst2q_lane_u16(
16497 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
16498 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
16499 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
16500 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16501 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16502 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
16503 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
16504 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16505 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16506 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16507 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16508 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16509 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16510 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16511 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16512 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16513 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16514 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16515 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16516 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16517 // CHECK: ret void
test_vst2q_lane_u16(uint16_t * a,uint16x8x2_t b)16518 void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) {
16519 vst2q_lane_u16(a, b, 7);
16520 }
16521
16522 // CHECK-LABEL: @test_vst2q_lane_u32(
16523 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
16524 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
16525 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
16526 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16527 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16528 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
16529 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
16530 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16531 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16532 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16533 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16534 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16535 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16536 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16537 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16538 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16539 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16540 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16541 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16542 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
16543 // CHECK: ret void
test_vst2q_lane_u32(uint32_t * a,uint32x4x2_t b)16544 void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) {
16545 vst2q_lane_u32(a, b, 3);
16546 }
16547
16548 // CHECK-LABEL: @test_vst2q_lane_s16(
16549 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
16550 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
16551 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
16552 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16553 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16554 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
16555 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
16556 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16557 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16558 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16559 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16560 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16561 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16562 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16563 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16564 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16565 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16566 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16567 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16568 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16569 // CHECK: ret void
test_vst2q_lane_s16(int16_t * a,int16x8x2_t b)16570 void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) {
16571 vst2q_lane_s16(a, b, 7);
16572 }
16573
16574 // CHECK-LABEL: @test_vst2q_lane_s32(
16575 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
16576 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
16577 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
16578 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16579 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16580 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
16581 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
16582 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16583 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16584 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16585 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16586 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16587 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16588 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16589 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16590 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16591 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16592 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16593 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16594 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
16595 // CHECK: ret void
test_vst2q_lane_s32(int32_t * a,int32x4x2_t b)16596 void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) {
16597 vst2q_lane_s32(a, b, 3);
16598 }
16599
16600 // CHECK-LABEL: @test_vst2q_lane_f16(
16601 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
16602 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
16603 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
16604 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
16605 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16606 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
16607 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
16608 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16609 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
16610 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16611 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
16612 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16613 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16614 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16615 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
16616 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16617 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16618 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16619 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16620 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 7, i32 2)
16621 // CHECK: ret void
test_vst2q_lane_f16(float16_t * a,float16x8x2_t b)16622 void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) {
16623 vst2q_lane_f16(a, b, 7);
16624 }
16625
16626 // CHECK-LABEL: @test_vst2q_lane_f32(
16627 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
16628 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
16629 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
16630 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
16631 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16632 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
16633 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
16634 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16635 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
16636 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16637 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
16638 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16639 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16640 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16641 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
16642 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16643 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16644 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16645 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16646 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 3, i32 4)
16647 // CHECK: ret void
test_vst2q_lane_f32(float32_t * a,float32x4x2_t b)16648 void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) {
16649 vst2q_lane_f32(a, b, 3);
16650 }
16651
16652 // CHECK-LABEL: @test_vst2q_lane_p16(
16653 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
16654 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
16655 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
16656 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16657 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16658 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
16659 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
16660 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16661 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16662 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16663 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16664 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16665 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16666 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16667 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16668 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16669 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16670 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16671 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16672 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16673 // CHECK: ret void
test_vst2q_lane_p16(poly16_t * a,poly16x8x2_t b)16674 void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) {
16675 vst2q_lane_p16(a, b, 7);
16676 }
16677
16678 // CHECK-LABEL: @test_vst2_lane_u8(
16679 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
16680 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
16681 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
16682 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16683 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16684 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
16685 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
16686 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16687 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16688 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16689 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16690 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16691 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16692 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16693 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16694 // CHECK: ret void
test_vst2_lane_u8(uint8_t * a,uint8x8x2_t b)16695 void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) {
16696 vst2_lane_u8(a, b, 7);
16697 }
16698
16699 // CHECK-LABEL: @test_vst2_lane_u16(
16700 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
16701 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
16702 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
16703 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16704 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16705 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
16706 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
16707 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16708 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16709 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16710 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16711 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16712 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16713 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16714 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16715 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16716 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16717 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16718 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16719 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16720 // CHECK: ret void
test_vst2_lane_u16(uint16_t * a,uint16x4x2_t b)16721 void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) {
16722 vst2_lane_u16(a, b, 3);
16723 }
16724
16725 // CHECK-LABEL: @test_vst2_lane_u32(
16726 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
16727 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
16728 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
16729 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16730 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16731 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
16732 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
16733 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16734 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16735 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16736 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16737 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16738 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16739 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16740 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16741 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16742 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16743 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16744 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16745 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16746 // CHECK: ret void
test_vst2_lane_u32(uint32_t * a,uint32x2x2_t b)16747 void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) {
16748 vst2_lane_u32(a, b, 1);
16749 }
16750
16751 // CHECK-LABEL: @test_vst2_lane_s8(
16752 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
16753 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
16754 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
16755 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16756 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16757 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
16758 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
16759 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16760 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16761 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16762 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16763 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16764 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16765 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16766 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16767 // CHECK: ret void
test_vst2_lane_s8(int8_t * a,int8x8x2_t b)16768 void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) {
16769 vst2_lane_s8(a, b, 7);
16770 }
16771
16772 // CHECK-LABEL: @test_vst2_lane_s16(
16773 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
16774 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
16775 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
16776 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16777 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16778 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
16779 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
16780 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16781 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16782 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16783 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16784 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16785 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16786 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16787 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16788 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16789 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16790 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16791 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16792 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16793 // CHECK: ret void
test_vst2_lane_s16(int16_t * a,int16x4x2_t b)16794 void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) {
16795 vst2_lane_s16(a, b, 3);
16796 }
16797
16798 // CHECK-LABEL: @test_vst2_lane_s32(
16799 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
16800 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
16801 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
16802 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16803 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16804 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
16805 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
16806 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16807 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16808 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16809 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16810 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16811 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16812 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16813 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16814 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16815 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16816 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16817 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16818 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16819 // CHECK: ret void
test_vst2_lane_s32(int32_t * a,int32x2x2_t b)16820 void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) {
16821 vst2_lane_s32(a, b, 1);
16822 }
16823
16824 // CHECK-LABEL: @test_vst2_lane_f16(
16825 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16826 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16827 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16828 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
16829 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16830 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16831 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16832 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16833 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
16834 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16835 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
16836 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16837 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16838 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16839 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
16840 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16841 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16842 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16843 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16844 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 3, i32 2)
16845 // CHECK: ret void
test_vst2_lane_f16(float16_t * a,float16x4x2_t b)16846 void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) {
16847 vst2_lane_f16(a, b, 3);
16848 }
16849
16850 // CHECK-LABEL: @test_vst2_lane_f32(
16851 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16852 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16853 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16854 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
16855 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16856 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16857 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16858 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16859 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
16860 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16861 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
16862 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16863 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16864 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16865 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
16866 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16867 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16868 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16869 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16870 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 1, i32 4)
16871 // CHECK: ret void
test_vst2_lane_f32(float32_t * a,float32x2x2_t b)16872 void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) {
16873 vst2_lane_f32(a, b, 1);
16874 }
16875
16876 // CHECK-LABEL: @test_vst2_lane_p8(
16877 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16878 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16879 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16880 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16881 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16882 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16883 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16884 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16885 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16886 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16887 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16888 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16889 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16890 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16891 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16892 // CHECK: ret void
test_vst2_lane_p8(poly8_t * a,poly8x8x2_t b)16893 void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) {
16894 vst2_lane_p8(a, b, 7);
16895 }
16896
16897 // CHECK-LABEL: @test_vst2_lane_p16(
16898 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16899 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16900 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16901 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16902 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16903 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16904 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16905 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16906 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16907 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16908 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16909 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16910 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16911 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16912 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16913 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16914 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16915 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16916 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16917 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16918 // CHECK: ret void
test_vst2_lane_p16(poly16_t * a,poly16x4x2_t b)16919 void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) {
16920 vst2_lane_p16(a, b, 3);
16921 }
16922
16923 // CHECK-LABEL: @test_vst3q_u8(
16924 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
16925 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
16926 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
16927 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
16928 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
16929 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
16930 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
16931 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
16932 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16933 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
16934 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16935 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16936 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16937 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16938 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16939 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
16940 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16941 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
16942 // CHECK: ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)16943 void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) {
16944 vst3q_u8(a, b);
16945 }
16946
16947 // CHECK-LABEL: @test_vst3q_u16(
16948 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
16949 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
16950 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
16951 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
16952 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
16953 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
16954 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
16955 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
16956 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
16957 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16958 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
16959 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16960 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16961 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16962 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16963 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16964 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16965 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16966 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
16967 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16968 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
16969 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16970 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16971 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
16972 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
16973 // CHECK: ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)16974 void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) {
16975 vst3q_u16(a, b);
16976 }
16977
16978 // CHECK-LABEL: @test_vst3q_u32(
16979 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
16980 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
16981 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
16982 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
16983 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
16984 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
16985 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
16986 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
16987 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
16988 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16989 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
16990 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16991 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16992 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16993 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16994 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16995 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16996 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16997 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
16998 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
16999 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17000 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17001 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17002 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17003 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
17004 // CHECK: ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)17005 void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) {
17006 vst3q_u32(a, b);
17007 }
17008
17009 // CHECK-LABEL: @test_vst3q_s8(
17010 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
17011 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
17012 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
17013 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
17014 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17015 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
17016 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
17017 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17018 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17019 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
17020 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17021 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17022 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
17023 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17024 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17025 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
17026 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17027 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
17028 // CHECK: ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)17029 void test_vst3q_s8(int8_t * a, int8x16x3_t b) {
17030 vst3q_s8(a, b);
17031 }
17032
17033 // CHECK-LABEL: @test_vst3q_s16(
17034 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
17035 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
17036 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
17037 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17038 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17039 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
17040 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
17041 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17042 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17043 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17044 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17045 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17046 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17047 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17048 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17049 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17050 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17051 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17052 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17053 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17054 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17055 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17056 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17057 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17058 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17059 // CHECK: ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)17060 void test_vst3q_s16(int16_t * a, int16x8x3_t b) {
17061 vst3q_s16(a, b);
17062 }
17063
17064 // CHECK-LABEL: @test_vst3q_s32(
17065 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
17066 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
17067 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
17068 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17069 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17070 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
17071 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
17072 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17073 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17074 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17075 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17076 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17077 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17078 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17079 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17080 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17081 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17082 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17083 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17084 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17085 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17086 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17087 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17088 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17089 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
17090 // CHECK: ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)17091 void test_vst3q_s32(int32_t * a, int32x4x3_t b) {
17092 vst3q_s32(a, b);
17093 }
17094
17095 // CHECK-LABEL: @test_vst3q_f16(
17096 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
17097 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
17098 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
17099 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
17100 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17101 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
17102 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
17103 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17104 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
17105 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17106 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
17107 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17108 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
17109 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17110 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
17111 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17112 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
17113 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17114 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
17115 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17116 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17117 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17118 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17119 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17120 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 2)
17121 // CHECK: ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)17122 void test_vst3q_f16(float16_t * a, float16x8x3_t b) {
17123 vst3q_f16(a, b);
17124 }
17125
17126 // CHECK-LABEL: @test_vst3q_f32(
17127 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
17128 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
17129 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
17130 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
17131 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17132 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
17133 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
17134 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17135 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
17136 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17137 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
17138 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17139 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17140 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17141 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
17142 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17143 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17144 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17145 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
17146 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17147 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17148 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17149 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17150 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17151 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 4)
17152 // CHECK: ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)17153 void test_vst3q_f32(float32_t * a, float32x4x3_t b) {
17154 vst3q_f32(a, b);
17155 }
17156
17157 // CHECK-LABEL: @test_vst3q_p8(
17158 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
17159 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
17160 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
17161 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
17162 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17163 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
17164 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
17165 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17166 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17167 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
17168 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17169 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17170 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
17171 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17172 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17173 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
17174 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17175 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
17176 // CHECK: ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)17177 void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) {
17178 vst3q_p8(a, b);
17179 }
17180
17181 // CHECK-LABEL: @test_vst3q_p16(
17182 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
17183 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
17184 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
17185 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17186 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17187 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
17188 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
17189 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17190 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17191 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17192 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17193 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17194 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17195 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17196 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17197 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17198 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17199 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17200 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17201 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17202 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17203 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17204 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17205 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17206 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17207 // CHECK: ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)17208 void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) {
17209 vst3q_p16(a, b);
17210 }
17211
17212 // CHECK-LABEL: @test_vst3_u8(
17213 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
17214 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
17215 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
17216 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17217 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17218 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
17219 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
17220 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17221 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17222 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17223 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17224 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17225 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17226 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17227 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17228 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17229 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17230 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17231 // CHECK: ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)17232 void test_vst3_u8(uint8_t * a, uint8x8x3_t b) {
17233 vst3_u8(a, b);
17234 }
17235
17236 // CHECK-LABEL: @test_vst3_u16(
17237 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
17238 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
17239 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
17240 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17241 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17242 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
17243 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
17244 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17245 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17246 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17247 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17248 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17249 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17250 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17251 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17252 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17253 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17254 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17255 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17256 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17257 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17258 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17259 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17260 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17261 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17262 // CHECK: ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)17263 void test_vst3_u16(uint16_t * a, uint16x4x3_t b) {
17264 vst3_u16(a, b);
17265 }
17266
17267 // CHECK-LABEL: @test_vst3_u32(
17268 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
17269 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
17270 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
17271 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17272 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17273 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
17274 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
17275 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17276 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17277 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17278 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17279 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17280 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17281 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17282 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17283 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17284 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17285 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17286 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17287 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17288 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17289 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17290 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17291 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17292 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
17293 // CHECK: ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)17294 void test_vst3_u32(uint32_t * a, uint32x2x3_t b) {
17295 vst3_u32(a, b);
17296 }
17297
17298 // CHECK-LABEL: @test_vst3_u64(
17299 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
17300 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
17301 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
17302 // CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
17303 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17304 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
17305 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
17306 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17307 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
17308 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17309 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
17310 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17311 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17312 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17313 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
17314 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17315 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17316 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17317 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
17318 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17319 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17320 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17321 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17322 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17323 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
17324 // CHECK: ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)17325 void test_vst3_u64(uint64_t * a, uint64x1x3_t b) {
17326 vst3_u64(a, b);
17327 }
17328
17329 // CHECK-LABEL: @test_vst3_s8(
17330 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
17331 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
17332 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
17333 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17334 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17335 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
17336 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
17337 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17338 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17339 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17340 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17341 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17342 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17343 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17344 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17345 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17346 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17347 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17348 // CHECK: ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)17349 void test_vst3_s8(int8_t * a, int8x8x3_t b) {
17350 vst3_s8(a, b);
17351 }
17352
17353 // CHECK-LABEL: @test_vst3_s16(
17354 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
17355 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
17356 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
17357 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17358 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17359 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
17360 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
17361 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17362 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17363 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17364 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17365 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17366 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17367 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17368 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17369 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17370 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17371 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17372 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17373 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17374 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17375 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17376 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17377 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17378 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17379 // CHECK: ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)17380 void test_vst3_s16(int16_t * a, int16x4x3_t b) {
17381 vst3_s16(a, b);
17382 }
17383
17384 // CHECK-LABEL: @test_vst3_s32(
17385 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
17386 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
17387 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
17388 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17389 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17390 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
17391 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
17392 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17393 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17394 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17395 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17396 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17397 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17398 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17399 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17400 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17401 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17402 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17403 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17404 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17405 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17406 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17407 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17408 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17409 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
17410 // CHECK: ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)17411 void test_vst3_s32(int32_t * a, int32x2x3_t b) {
17412 vst3_s32(a, b);
17413 }
17414
17415 // CHECK-LABEL: @test_vst3_s64(
17416 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
17417 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
17418 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
17419 // CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
17420 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17421 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
17422 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
17423 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17424 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
17425 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17426 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
17427 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17428 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17429 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17430 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
17431 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17432 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17433 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17434 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
17435 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17436 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17437 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17438 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17439 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17440 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
17441 // CHECK: ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)17442 void test_vst3_s64(int64_t * a, int64x1x3_t b) {
17443 vst3_s64(a, b);
17444 }
17445
17446 // CHECK-LABEL: @test_vst3_f16(
17447 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
17448 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
17449 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
17450 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
17451 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17452 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
17453 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
17454 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17455 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
17456 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17457 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
17458 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
17459 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
17460 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17461 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
17462 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
17463 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
17464 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17465 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
17466 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
17467 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
17468 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
17469 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
17470 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
17471 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 2)
17472 // CHECK: ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)17473 void test_vst3_f16(float16_t * a, float16x4x3_t b) {
17474 vst3_f16(a, b);
17475 }
17476
17477 // CHECK-LABEL: @test_vst3_f32(
17478 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
17479 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
17480 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
17481 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
17482 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17483 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
17484 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
17485 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17486 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
17487 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17488 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
17489 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
17490 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
17491 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17492 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
17493 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
17494 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
17495 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17496 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
17497 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
17498 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
17499 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
17500 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
17501 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
17502 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 4)
17503 // CHECK: ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)17504 void test_vst3_f32(float32_t * a, float32x2x3_t b) {
17505 vst3_f32(a, b);
17506 }
17507
17508 // CHECK-LABEL: @test_vst3_p8(
17509 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
17510 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
17511 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
17512 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17513 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17514 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
17515 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
17516 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17517 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17518 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17519 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17520 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17521 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17522 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17523 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17524 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17525 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17526 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17527 // CHECK: ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)17528 void test_vst3_p8(poly8_t * a, poly8x8x3_t b) {
17529 vst3_p8(a, b);
17530 }
17531
17532 // CHECK-LABEL: @test_vst3_p16(
17533 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
17534 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
17535 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
17536 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17537 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17538 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
17539 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
17540 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17541 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17542 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17543 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17544 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17545 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17546 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17547 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17548 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17549 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17550 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17551 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17552 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17553 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17554 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17555 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17556 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17557 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17558 // CHECK: ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)17559 void test_vst3_p16(poly16_t * a, poly16x4x3_t b) {
17560 vst3_p16(a, b);
17561 }
17562
17563 // CHECK-LABEL: @test_vst3q_lane_u16(
17564 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
17565 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
17566 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
17567 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17568 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17569 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
17570 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
17571 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17572 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17573 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17574 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17575 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17576 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17577 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17578 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17579 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17580 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17581 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17582 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17583 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17584 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17585 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17586 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17587 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17588 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17589 // CHECK: ret void
test_vst3q_lane_u16(uint16_t * a,uint16x8x3_t b)17590 void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) {
17591 vst3q_lane_u16(a, b, 7);
17592 }
17593
17594 // CHECK-LABEL: @test_vst3q_lane_u32(
17595 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
17596 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
17597 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
17598 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17599 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17600 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
17601 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
17602 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17603 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17604 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17605 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17606 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17607 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17608 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17609 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17610 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17611 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17612 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17613 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17614 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17615 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17616 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17617 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17618 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17619 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
17620 // CHECK: ret void
test_vst3q_lane_u32(uint32_t * a,uint32x4x3_t b)17621 void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) {
17622 vst3q_lane_u32(a, b, 3);
17623 }
17624
17625 // CHECK-LABEL: @test_vst3q_lane_s16(
17626 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
17627 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
17628 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
17629 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17630 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17631 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
17632 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
17633 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17634 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17635 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17636 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17637 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17638 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17639 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17640 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17641 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17642 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17643 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17644 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17645 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17646 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17647 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17648 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17649 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17650 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17651 // CHECK: ret void
test_vst3q_lane_s16(int16_t * a,int16x8x3_t b)17652 void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) {
17653 vst3q_lane_s16(a, b, 7);
17654 }
17655
17656 // CHECK-LABEL: @test_vst3q_lane_s32(
17657 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
17658 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
17659 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
17660 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17661 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17662 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
17663 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
17664 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17665 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17666 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17667 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17668 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17669 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17670 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17671 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17672 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17673 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17674 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17675 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17676 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17677 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17678 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17679 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17680 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17681 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
17682 // CHECK: ret void
test_vst3q_lane_s32(int32_t * a,int32x4x3_t b)17683 void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) {
17684 vst3q_lane_s32(a, b, 3);
17685 }
17686
17687 // CHECK-LABEL: @test_vst3q_lane_f16(
17688 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
17689 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
17690 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
17691 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
17692 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17693 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
17694 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
17695 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17696 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
17697 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17698 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
17699 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17700 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
17701 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17702 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
17703 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17704 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
17705 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17706 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
17707 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17708 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17709 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17710 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17711 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17712 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 7, i32 2)
17713 // CHECK: ret void
test_vst3q_lane_f16(float16_t * a,float16x8x3_t b)17714 void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) {
17715 vst3q_lane_f16(a, b, 7);
17716 }
17717
17718 // CHECK-LABEL: @test_vst3q_lane_f32(
17719 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
17720 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
17721 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
17722 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
17723 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17724 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
17725 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
17726 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17727 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
17728 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17729 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
17730 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17731 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17732 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17733 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
17734 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17735 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17736 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17737 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
17738 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17739 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17740 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17741 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17742 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17743 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 3, i32 4)
17744 // CHECK: ret void
test_vst3q_lane_f32(float32_t * a,float32x4x3_t b)17745 void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) {
17746 vst3q_lane_f32(a, b, 3);
17747 }
17748
17749 // CHECK-LABEL: @test_vst3q_lane_p16(
17750 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
17751 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
17752 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
17753 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17754 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17755 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
17756 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
17757 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17758 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17759 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17760 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17761 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17762 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17763 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17764 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17765 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17766 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17767 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17768 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17769 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17770 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17771 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17772 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17773 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17774 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17775 // CHECK: ret void
test_vst3q_lane_p16(poly16_t * a,poly16x8x3_t b)17776 void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) {
17777 vst3q_lane_p16(a, b, 7);
17778 }
17779
17780 // CHECK-LABEL: @test_vst3_lane_u8(
17781 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
17782 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
17783 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
17784 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17785 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17786 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
17787 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
17788 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17789 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17790 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17791 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17792 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17793 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17794 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17795 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17796 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17797 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17798 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17799 // CHECK: ret void
test_vst3_lane_u8(uint8_t * a,uint8x8x3_t b)17800 void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) {
17801 vst3_lane_u8(a, b, 7);
17802 }
17803
17804 // CHECK-LABEL: @test_vst3_lane_u16(
17805 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
17806 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
17807 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
17808 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17809 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17810 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
17811 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
17812 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17813 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17814 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17815 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17816 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17817 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17818 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17819 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17820 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17821 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17822 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17823 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17824 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17825 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17826 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17827 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17828 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17829 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17830 // CHECK: ret void
test_vst3_lane_u16(uint16_t * a,uint16x4x3_t b)17831 void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) {
17832 vst3_lane_u16(a, b, 3);
17833 }
17834
17835 // CHECK-LABEL: @test_vst3_lane_u32(
17836 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
17837 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
17838 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
17839 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17840 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17841 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
17842 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
17843 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17844 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17845 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17846 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17847 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17848 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17849 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17850 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17851 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17852 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17853 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17854 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17855 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17856 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17857 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17858 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17859 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17860 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17861 // CHECK: ret void
test_vst3_lane_u32(uint32_t * a,uint32x2x3_t b)17862 void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) {
17863 vst3_lane_u32(a, b, 1);
17864 }
17865
17866 // CHECK-LABEL: @test_vst3_lane_s8(
17867 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
17868 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
17869 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
17870 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17871 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17872 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
17873 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
17874 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17875 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17876 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17877 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17878 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17879 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17880 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17881 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17882 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17883 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17884 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17885 // CHECK: ret void
test_vst3_lane_s8(int8_t * a,int8x8x3_t b)17886 void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) {
17887 vst3_lane_s8(a, b, 7);
17888 }
17889
17890 // CHECK-LABEL: @test_vst3_lane_s16(
17891 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
17892 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
17893 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
17894 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17895 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17896 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
17897 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
17898 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17899 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
17900 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17901 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17902 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17903 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17904 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17905 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17906 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17907 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17908 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17909 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17910 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17911 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17912 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17913 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17914 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17915 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17916 // CHECK: ret void
test_vst3_lane_s16(int16_t * a,int16x4x3_t b)17917 void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) {
17918 vst3_lane_s16(a, b, 3);
17919 }
17920
17921 // CHECK-LABEL: @test_vst3_lane_s32(
17922 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
17923 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
17924 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
17925 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17926 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17927 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
17928 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
17929 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17930 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
17931 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17932 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17933 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17934 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17935 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17936 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17937 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17938 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17939 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17940 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17941 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17942 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17943 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17944 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17945 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17946 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17947 // CHECK: ret void
test_vst3_lane_s32(int32_t * a,int32x2x3_t b)17948 void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) {
17949 vst3_lane_s32(a, b, 1);
17950 }
17951
17952 // CHECK-LABEL: @test_vst3_lane_f16(
17953 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
17954 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
17955 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
17956 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
17957 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17958 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
17959 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
17960 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17961 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
17962 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17963 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
17964 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
17965 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
17966 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17967 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
17968 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
17969 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
17970 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17971 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
17972 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
17973 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
17974 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
17975 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
17976 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
17977 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 3, i32 2)
17978 // CHECK: ret void
test_vst3_lane_f16(float16_t * a,float16x4x3_t b)17979 void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) {
17980 vst3_lane_f16(a, b, 3);
17981 }
17982
17983 // CHECK-LABEL: @test_vst3_lane_f32(
17984 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
17985 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
17986 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
17987 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
17988 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17989 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
17990 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
17991 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17992 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
17993 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17994 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
17995 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
17996 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
17997 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17998 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
17999 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
18000 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18001 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
18002 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
18003 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
18004 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
18005 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18006 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18007 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
18008 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 1, i32 4)
18009 // CHECK: ret void
test_vst3_lane_f32(float32_t * a,float32x2x3_t b)18010 void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) {
18011 vst3_lane_f32(a, b, 1);
18012 }
18013
18014 // CHECK-LABEL: @test_vst3_lane_p8(
18015 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
18016 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
18017 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
18018 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
18019 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18020 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
18021 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
18022 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18023 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18024 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
18025 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18026 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18027 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18028 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18029 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18030 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18031 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18032 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
18033 // CHECK: ret void
test_vst3_lane_p8(poly8_t * a,poly8x8x3_t b)18034 void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) {
18035 vst3_lane_p8(a, b, 7);
18036 }
18037
18038 // CHECK-LABEL: @test_vst3_lane_p16(
18039 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
18040 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
18041 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
18042 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
18043 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18044 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
18045 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
18046 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18047 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18048 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18049 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
18050 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18051 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18052 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18053 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18054 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18055 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18056 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18057 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18058 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18059 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18060 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18061 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18062 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18063 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
18064 // CHECK: ret void
test_vst3_lane_p16(poly16_t * a,poly16x4x3_t b)18065 void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) {
18066 vst3_lane_p16(a, b, 3);
18067 }
18068
18069 // CHECK-LABEL: @test_vst4q_u8(
18070 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
18071 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
18072 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
18073 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18074 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18075 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
18076 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
18077 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18078 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18079 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18080 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18081 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18082 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18083 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18084 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18085 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18086 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18087 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18088 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18089 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18090 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18091 // CHECK: ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)18092 void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) {
18093 vst4q_u8(a, b);
18094 }
18095
18096 // CHECK-LABEL: @test_vst4q_u16(
18097 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
18098 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
18099 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
18100 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18101 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18102 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
18103 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
18104 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18105 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18106 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18107 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18108 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18109 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18110 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18111 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18112 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18113 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18114 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18115 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18116 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18117 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18118 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18119 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18120 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18121 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18122 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18123 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18124 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18125 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18126 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18127 // CHECK: ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)18128 void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) {
18129 vst4q_u16(a, b);
18130 }
18131
18132 // CHECK-LABEL: @test_vst4q_u32(
18133 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
18134 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
18135 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
18136 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18137 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18138 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
18139 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
18140 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18141 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18142 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18143 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18144 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18145 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18146 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18147 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18148 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18149 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18150 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18151 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18152 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18153 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18154 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18155 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18156 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18157 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18158 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18159 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18160 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18161 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18162 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
18163 // CHECK: ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)18164 void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) {
18165 vst4q_u32(a, b);
18166 }
18167
18168 // CHECK-LABEL: @test_vst4q_s8(
18169 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
18170 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
18171 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
18172 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18173 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18174 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
18175 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
18176 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18177 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18178 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18179 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18180 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18181 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18182 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18183 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18184 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18185 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18186 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18187 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18188 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18189 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18190 // CHECK: ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)18191 void test_vst4q_s8(int8_t * a, int8x16x4_t b) {
18192 vst4q_s8(a, b);
18193 }
18194
18195 // CHECK-LABEL: @test_vst4q_s16(
18196 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
18197 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
18198 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
18199 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18200 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18201 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
18202 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
18203 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18204 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18205 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18206 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18207 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18208 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18209 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18210 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18211 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18212 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18213 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18214 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18215 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18216 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18217 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18218 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18219 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18220 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18221 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18222 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18223 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18224 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18225 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18226 // CHECK: ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)18227 void test_vst4q_s16(int16_t * a, int16x8x4_t b) {
18228 vst4q_s16(a, b);
18229 }
18230
18231 // CHECK-LABEL: @test_vst4q_s32(
18232 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
18233 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
18234 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
18235 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18236 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18237 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
18238 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
18239 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18240 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18241 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18242 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18243 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18244 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18245 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18246 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18247 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18248 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18249 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18250 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18251 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18252 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18253 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18254 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18255 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18256 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18257 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18258 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18259 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18260 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18261 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
18262 // CHECK: ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)18263 void test_vst4q_s32(int32_t * a, int32x4x4_t b) {
18264 vst4q_s32(a, b);
18265 }
18266
18267 // CHECK-LABEL: @test_vst4q_f16(
18268 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
18269 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
18270 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
18271 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
18272 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18273 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
18274 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
18275 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18276 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
18277 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18278 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
18279 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
18280 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
18281 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18282 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
18283 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
18284 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
18285 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18286 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
18287 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
18288 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
18289 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18290 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
18291 // CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
18292 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
18293 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
18294 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
18295 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
18296 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
18297 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 2)
18298 // CHECK: ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)18299 void test_vst4q_f16(float16_t * a, float16x8x4_t b) {
18300 vst4q_f16(a, b);
18301 }
18302
18303 // CHECK-LABEL: @test_vst4q_f32(
18304 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
18305 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
18306 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
18307 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
18308 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18309 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
18310 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
18311 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18312 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
18313 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18314 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
18315 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
18316 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
18317 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18318 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
18319 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
18320 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
18321 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18322 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
18323 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
18324 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
18325 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18326 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
18327 // CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
18328 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
18329 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
18330 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
18331 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
18332 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
18333 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 4)
18334 // CHECK: ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)18335 void test_vst4q_f32(float32_t * a, float32x4x4_t b) {
18336 vst4q_f32(a, b);
18337 }
18338
18339 // CHECK-LABEL: @test_vst4q_p8(
18340 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
18341 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
18342 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
18343 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18344 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18345 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
18346 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
18347 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18348 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18349 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18350 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18351 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18352 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18353 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18354 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18355 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18356 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18357 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18358 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18359 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18360 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18361 // CHECK: ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)18362 void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) {
18363 vst4q_p8(a, b);
18364 }
18365
18366 // CHECK-LABEL: @test_vst4q_p16(
18367 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
18368 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
18369 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
18370 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18371 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18372 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
18373 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
18374 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18375 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18376 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18377 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18378 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18379 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18380 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18381 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18382 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18383 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18384 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18385 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18386 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18387 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18388 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18389 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18390 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18391 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18392 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18393 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18394 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18395 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18396 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18397 // CHECK: ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)18398 void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) {
18399 vst4q_p16(a, b);
18400 }
18401
18402 // CHECK-LABEL: @test_vst4_u8(
18403 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
18404 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
18405 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
18406 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18407 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18408 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
18409 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
18410 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18411 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18412 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18413 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18414 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18415 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18416 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18417 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18418 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18419 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18420 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18421 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18422 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18423 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18424 // CHECK: ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)18425 void test_vst4_u8(uint8_t * a, uint8x8x4_t b) {
18426 vst4_u8(a, b);
18427 }
18428
18429 // CHECK-LABEL: @test_vst4_u16(
18430 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
18431 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
18432 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
18433 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18434 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18435 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
18436 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
18437 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18438 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18439 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18440 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18441 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18442 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18443 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18444 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18445 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18446 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18447 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18448 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18449 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18450 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18451 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18452 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18453 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18454 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18455 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18456 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18457 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18458 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18459 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18460 // CHECK: ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)18461 void test_vst4_u16(uint16_t * a, uint16x4x4_t b) {
18462 vst4_u16(a, b);
18463 }
18464
18465 // CHECK-LABEL: @test_vst4_u32(
18466 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
18467 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
18468 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
18469 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18470 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18471 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
18472 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
18473 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18474 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18475 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18476 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
18477 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18478 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18479 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18480 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18481 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18482 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18483 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18484 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
18485 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
18486 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18487 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18488 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
18489 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
18490 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18491 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18492 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18493 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18494 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18495 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
18496 // CHECK: ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)18497 void test_vst4_u32(uint32_t * a, uint32x2x4_t b) {
18498 vst4_u32(a, b);
18499 }
18500
18501 // CHECK-LABEL: @test_vst4_u64(
18502 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
18503 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
18504 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
18505 // CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
18506 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18507 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
18508 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
18509 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18510 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
18511 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18512 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
18513 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18514 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18515 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18516 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18517 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18518 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18519 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18520 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
18521 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
18522 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
18523 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18524 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
18525 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
18526 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
18527 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18528 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18529 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
18530 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
18531 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
18532 // CHECK: ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)18533 void test_vst4_u64(uint64_t * a, uint64x1x4_t b) {
18534 vst4_u64(a, b);
18535 }
18536
18537 // CHECK-LABEL: @test_vst4_s8(
18538 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
18539 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
18540 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
18541 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18542 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18543 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
18544 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
18545 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18546 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18547 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18548 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18549 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18550 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18551 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18552 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18553 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18554 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18555 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18556 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18557 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18558 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18559 // CHECK: ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)18560 void test_vst4_s8(int8_t * a, int8x8x4_t b) {
18561 vst4_s8(a, b);
18562 }
18563
18564 // CHECK-LABEL: @test_vst4_s16(
18565 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
18566 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
18567 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
18568 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18569 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18570 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
18571 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
18572 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18573 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18574 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18575 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18576 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18577 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18578 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18579 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18580 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18581 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18582 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18583 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18584 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18585 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18586 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18587 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18588 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18589 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18590 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18591 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18592 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18593 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18594 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18595 // CHECK: ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)18596 void test_vst4_s16(int16_t * a, int16x4x4_t b) {
18597 vst4_s16(a, b);
18598 }
18599
18600 // CHECK-LABEL: @test_vst4_s32(
18601 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
18602 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
18603 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
18604 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18605 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18606 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
18607 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
18608 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18609 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18610 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18611 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
18612 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18613 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18614 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18615 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18616 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18617 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18618 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18619 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
18620 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
18621 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18622 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18623 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
18624 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
18625 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18626 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18627 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18628 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18629 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18630 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
18631 // CHECK: ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)18632 void test_vst4_s32(int32_t * a, int32x2x4_t b) {
18633 vst4_s32(a, b);
18634 }
18635
18636 // CHECK-LABEL: @test_vst4_s64(
18637 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
18638 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
18639 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
18640 // CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
18641 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18642 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
18643 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
18644 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18645 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8*
18646 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18647 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
18648 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18649 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18650 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18651 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18652 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18653 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18654 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18655 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
18656 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
18657 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
18658 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18659 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
18660 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
18661 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
18662 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18663 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18664 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
18665 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
18666 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
18667 // CHECK: ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)18668 void test_vst4_s64(int64_t * a, int64x1x4_t b) {
18669 vst4_s64(a, b);
18670 }
18671
18672 // CHECK-LABEL: @test_vst4_f16(
18673 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
18674 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
18675 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
18676 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
18677 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18678 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
18679 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
18680 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18681 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
18682 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18683 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
18684 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
18685 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
18686 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18687 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
18688 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
18689 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
18690 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18691 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
18692 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
18693 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
18694 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18695 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
18696 // CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
18697 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
18698 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
18699 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
18700 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
18701 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
18702 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 2)
18703 // CHECK: ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)18704 void test_vst4_f16(float16_t * a, float16x4x4_t b) {
18705 vst4_f16(a, b);
18706 }
18707
18708 // CHECK-LABEL: @test_vst4_f32(
18709 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
18710 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
18711 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
18712 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
18713 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18714 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
18715 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
18716 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18717 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
18718 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18719 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
18720 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
18721 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
18722 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18723 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
18724 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
18725 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18726 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18727 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
18728 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
18729 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
18730 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18731 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
18732 // CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
18733 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
18734 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18735 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18736 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
18737 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
18738 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 4)
18739 // CHECK: ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)18740 void test_vst4_f32(float32_t * a, float32x2x4_t b) {
18741 vst4_f32(a, b);
18742 }
18743
18744 // CHECK-LABEL: @test_vst4_p8(
18745 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
18746 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
18747 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
18748 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18749 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18750 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
18751 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
18752 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18753 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18754 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18755 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18756 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18757 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18758 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18759 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18760 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18761 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18762 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18763 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18764 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18765 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18766 // CHECK: ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)18767 void test_vst4_p8(poly8_t * a, poly8x8x4_t b) {
18768 vst4_p8(a, b);
18769 }
18770
18771 // CHECK-LABEL: @test_vst4_p16(
18772 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
18773 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
18774 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
18775 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18776 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18777 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
18778 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
18779 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18780 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18781 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18782 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18783 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18784 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18785 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18786 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18787 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18788 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18789 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18790 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18791 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18792 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18793 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18794 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18795 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18796 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18797 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18798 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18799 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18800 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18801 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18802 // CHECK: ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)18803 void test_vst4_p16(poly16_t * a, poly16x4x4_t b) {
18804 vst4_p16(a, b);
18805 }
18806
18807 // CHECK-LABEL: @test_vst4q_lane_u16(
18808 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
18809 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
18810 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
18811 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18812 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18813 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
18814 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
18815 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18816 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18817 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18818 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18819 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18820 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18821 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18822 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18823 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18824 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18825 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18826 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18827 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18828 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18829 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18830 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18831 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18832 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18833 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18834 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18835 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18836 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18837 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18838 // CHECK: ret void
test_vst4q_lane_u16(uint16_t * a,uint16x8x4_t b)18839 void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) {
18840 vst4q_lane_u16(a, b, 7);
18841 }
18842
18843 // CHECK-LABEL: @test_vst4q_lane_u32(
18844 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
18845 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
18846 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
18847 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18848 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18849 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
18850 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
18851 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18852 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18853 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18854 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18855 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18856 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18857 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18858 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18859 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18860 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18861 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18862 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18863 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18864 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18865 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18866 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18867 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18868 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18869 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18870 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18871 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18872 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18873 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18874 // CHECK: ret void
test_vst4q_lane_u32(uint32_t * a,uint32x4x4_t b)18875 void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) {
18876 vst4q_lane_u32(a, b, 3);
18877 }
18878
18879 // CHECK-LABEL: @test_vst4q_lane_s16(
18880 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
18881 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
18882 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
18883 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18884 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18885 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
18886 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
18887 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18888 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
18889 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18890 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18891 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18892 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18893 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18894 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18895 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18896 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18897 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18898 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18899 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18900 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18901 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18902 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18903 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18904 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18905 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18906 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18907 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18908 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18909 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18910 // CHECK: ret void
test_vst4q_lane_s16(int16_t * a,int16x8x4_t b)18911 void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) {
18912 vst4q_lane_s16(a, b, 7);
18913 }
18914
18915 // CHECK-LABEL: @test_vst4q_lane_s32(
18916 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
18917 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
18918 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
18919 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18920 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18921 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
18922 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
18923 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18924 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
18925 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18926 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18927 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18928 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18929 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18930 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18931 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18932 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18933 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18934 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18935 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18936 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18937 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18938 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18939 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18940 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18941 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18942 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18943 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18944 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18945 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18946 // CHECK: ret void
test_vst4q_lane_s32(int32_t * a,int32x4x4_t b)18947 void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) {
18948 vst4q_lane_s32(a, b, 3);
18949 }
18950
18951 // CHECK-LABEL: @test_vst4q_lane_f16(
18952 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
18953 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
18954 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
18955 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
18956 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18957 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
18958 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
18959 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18960 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
18961 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18962 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
18963 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
18964 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
18965 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18966 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
18967 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
18968 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
18969 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18970 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
18971 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
18972 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
18973 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18974 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
18975 // CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
18976 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
18977 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
18978 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
18979 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
18980 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
18981 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 7, i32 2)
18982 // CHECK: ret void
test_vst4q_lane_f16(float16_t * a,float16x8x4_t b)18983 void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) {
18984 vst4q_lane_f16(a, b, 7);
18985 }
18986
18987 // CHECK-LABEL: @test_vst4q_lane_f32(
18988 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
18989 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
18990 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
18991 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
18992 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18993 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
18994 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
18995 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18996 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
18997 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18998 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
18999 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
19000 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
19001 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19002 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
19003 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
19004 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
19005 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19006 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
19007 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
19008 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
19009 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19010 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
19011 // CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
19012 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
19013 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
19014 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
19015 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
19016 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
19017 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 3, i32 4)
19018 // CHECK: ret void
test_vst4q_lane_f32(float32_t * a,float32x4x4_t b)19019 void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) {
19020 vst4q_lane_f32(a, b, 3);
19021 }
19022
19023 // CHECK-LABEL: @test_vst4q_lane_p16(
19024 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
19025 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
19026 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
19027 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
19028 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
19029 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
19030 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
19031 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
19032 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19033 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19034 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
19035 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
19036 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
19037 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19038 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
19039 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
19040 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
19041 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19042 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
19043 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
19044 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
19045 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19046 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
19047 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
19048 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
19049 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19050 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19051 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
19052 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
19053 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
19054 // CHECK: ret void
test_vst4q_lane_p16(poly16_t * a,poly16x8x4_t b)19055 void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) {
19056 vst4q_lane_p16(a, b, 7);
19057 }
19058
19059 // CHECK-LABEL: @test_vst4_lane_u8(
19060 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
19061 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
19062 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
19063 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19064 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19065 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
19066 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
19067 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19068 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19069 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19070 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19071 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19072 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19073 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19074 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19075 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19076 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19077 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19078 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19079 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19080 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19081 // CHECK: ret void
test_vst4_lane_u8(uint8_t * a,uint8x8x4_t b)19082 void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) {
19083 vst4_lane_u8(a, b, 7);
19084 }
19085
19086 // CHECK-LABEL: @test_vst4_lane_u16(
19087 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
19088 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
19089 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
19090 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19091 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19092 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
19093 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
19094 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19095 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19096 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19097 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19098 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19099 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19100 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19101 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19102 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19103 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19104 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19105 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19106 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19107 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19108 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19109 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19110 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19111 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19112 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19113 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19114 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19115 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19116 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19117 // CHECK: ret void
test_vst4_lane_u16(uint16_t * a,uint16x4x4_t b)19118 void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) {
19119 vst4_lane_u16(a, b, 3);
19120 }
19121
19122 // CHECK-LABEL: @test_vst4_lane_u32(
19123 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
19124 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
19125 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
19126 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19127 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19128 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
19129 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
19130 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19131 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19132 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19133 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
19134 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19135 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19136 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19137 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19138 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19139 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19140 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19141 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19142 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19143 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19144 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19145 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
19146 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
19147 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
19148 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19149 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19150 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19151 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
19152 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
19153 // CHECK: ret void
test_vst4_lane_u32(uint32_t * a,uint32x2x4_t b)19154 void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) {
19155 vst4_lane_u32(a, b, 1);
19156 }
19157
19158 // CHECK-LABEL: @test_vst4_lane_s8(
19159 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
19160 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
19161 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
19162 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19163 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19164 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
19165 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
19166 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19167 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19168 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19169 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19170 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19171 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19172 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19173 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19174 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19175 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19176 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19177 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19178 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19179 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19180 // CHECK: ret void
test_vst4_lane_s8(int8_t * a,int8x8x4_t b)19181 void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) {
19182 vst4_lane_s8(a, b, 7);
19183 }
19184
19185 // CHECK-LABEL: @test_vst4_lane_s16(
19186 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
19187 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
19188 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
19189 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19190 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19191 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
19192 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
19193 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19194 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19195 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19196 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19197 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19198 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19199 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19200 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19201 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19202 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19203 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19204 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19205 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19206 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19207 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19208 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19209 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19210 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19211 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19212 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19213 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19214 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19215 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19216 // CHECK: ret void
test_vst4_lane_s16(int16_t * a,int16x4x4_t b)19217 void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) {
19218 vst4_lane_s16(a, b, 3);
19219 }
19220
19221 // CHECK-LABEL: @test_vst4_lane_s32(
19222 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
19223 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
19224 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
19225 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19226 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19227 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
19228 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
19229 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19230 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
19231 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19232 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
19233 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19234 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19235 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19236 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19237 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19238 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19239 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19240 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19241 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19242 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19243 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19244 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
19245 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
19246 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
19247 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19248 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19249 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19250 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
19251 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
19252 // CHECK: ret void
test_vst4_lane_s32(int32_t * a,int32x2x4_t b)19253 void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) {
19254 vst4_lane_s32(a, b, 1);
19255 }
19256
19257 // CHECK-LABEL: @test_vst4_lane_f16(
19258 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
19259 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
19260 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
19261 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
19262 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19263 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
19264 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
19265 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19266 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
19267 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19268 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
19269 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
19270 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
19271 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19272 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
19273 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
19274 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
19275 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19276 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
19277 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
19278 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
19279 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19280 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
19281 // CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
19282 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
19283 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
19284 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
19285 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
19286 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
19287 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 3, i32 2)
19288 // CHECK: ret void
test_vst4_lane_f16(float16_t * a,float16x4x4_t b)19289 void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
19290 vst4_lane_f16(a, b, 3);
19291 }
19292
19293 // CHECK-LABEL: @test_vst4_lane_f32(
19294 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
19295 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
19296 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
19297 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
19298 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19299 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
19300 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
19301 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19302 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
19303 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19304 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
19305 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
19306 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
19307 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19308 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
19309 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
19310 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
19311 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19312 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
19313 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
19314 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
19315 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19316 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
19317 // CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
19318 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
19319 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
19320 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
19321 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
19322 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
19323 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 1, i32 4)
19324 // CHECK: ret void
test_vst4_lane_f32(float32_t * a,float32x2x4_t b)19325 void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) {
19326 vst4_lane_f32(a, b, 1);
19327 }
19328
19329 // CHECK-LABEL: @test_vst4_lane_p8(
19330 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
19331 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
19332 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
19333 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19334 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19335 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
19336 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
19337 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19338 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19339 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19340 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19341 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19342 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19343 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19344 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19345 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19346 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19347 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19348 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19349 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19350 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19351 // CHECK: ret void
test_vst4_lane_p8(poly8_t * a,poly8x8x4_t b)19352 void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) {
19353 vst4_lane_p8(a, b, 7);
19354 }
19355
19356 // CHECK-LABEL: @test_vst4_lane_p16(
19357 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
19358 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
19359 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
19360 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19361 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19362 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
19363 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
19364 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19365 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
19366 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19367 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19368 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19369 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19370 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19371 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19372 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19373 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19374 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19375 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19376 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19377 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19378 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19379 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19380 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19381 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19382 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19383 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19384 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19385 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19386 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19387 // CHECK: ret void
test_vst4_lane_p16(poly16_t * a,poly16x4x4_t b)19388 void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) {
19389 vst4_lane_p16(a, b, 3);
19390 }
19391
19392 // CHECK-LABEL: @test_vsub_s8(
19393 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
19394 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t a,int8x8_t b)19395 int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) {
19396 return vsub_s8(a, b);
19397 }
19398
19399 // CHECK-LABEL: @test_vsub_s16(
19400 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
19401 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t a,int16x4_t b)19402 int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) {
19403 return vsub_s16(a, b);
19404 }
19405
19406 // CHECK-LABEL: @test_vsub_s32(
19407 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
19408 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t a,int32x2_t b)19409 int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) {
19410 return vsub_s32(a, b);
19411 }
19412
19413 // CHECK-LABEL: @test_vsub_s64(
19414 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
19415 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t a,int64x1_t b)19416 int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) {
19417 return vsub_s64(a, b);
19418 }
19419
19420 // CHECK-LABEL: @test_vsub_f32(
19421 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, %b
19422 // CHECK: ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t a,float32x2_t b)19423 float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) {
19424 return vsub_f32(a, b);
19425 }
19426
19427 // CHECK-LABEL: @test_vsub_u8(
19428 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
19429 // CHECK: ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t a,uint8x8_t b)19430 uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) {
19431 return vsub_u8(a, b);
19432 }
19433
19434 // CHECK-LABEL: @test_vsub_u16(
19435 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
19436 // CHECK: ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t a,uint16x4_t b)19437 uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) {
19438 return vsub_u16(a, b);
19439 }
19440
19441 // CHECK-LABEL: @test_vsub_u32(
19442 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
19443 // CHECK: ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t a,uint32x2_t b)19444 uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) {
19445 return vsub_u32(a, b);
19446 }
19447
19448 // CHECK-LABEL: @test_vsub_u64(
19449 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
19450 // CHECK: ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t a,uint64x1_t b)19451 uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) {
19452 return vsub_u64(a, b);
19453 }
19454
19455 // CHECK-LABEL: @test_vsubq_s8(
19456 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
19457 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t a,int8x16_t b)19458 int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) {
19459 return vsubq_s8(a, b);
19460 }
19461
19462 // CHECK-LABEL: @test_vsubq_s16(
19463 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
19464 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t a,int16x8_t b)19465 int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) {
19466 return vsubq_s16(a, b);
19467 }
19468
19469 // CHECK-LABEL: @test_vsubq_s32(
19470 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
19471 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t a,int32x4_t b)19472 int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) {
19473 return vsubq_s32(a, b);
19474 }
19475
19476 // CHECK-LABEL: @test_vsubq_s64(
19477 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
19478 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t a,int64x2_t b)19479 int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) {
19480 return vsubq_s64(a, b);
19481 }
19482
19483 // CHECK-LABEL: @test_vsubq_f32(
19484 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, %b
19485 // CHECK: ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t a,float32x4_t b)19486 float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) {
19487 return vsubq_f32(a, b);
19488 }
19489
19490 // CHECK-LABEL: @test_vsubq_u8(
19491 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
19492 // CHECK: ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t a,uint8x16_t b)19493 uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) {
19494 return vsubq_u8(a, b);
19495 }
19496
19497 // CHECK-LABEL: @test_vsubq_u16(
19498 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
19499 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t a,uint16x8_t b)19500 uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) {
19501 return vsubq_u16(a, b);
19502 }
19503
19504 // CHECK-LABEL: @test_vsubq_u32(
19505 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
19506 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t a,uint32x4_t b)19507 uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) {
19508 return vsubq_u32(a, b);
19509 }
19510
19511 // CHECK-LABEL: @test_vsubq_u64(
19512 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
19513 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t a,uint64x2_t b)19514 uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) {
19515 return vsubq_u64(a, b);
19516 }
19517
19518 // CHECK-LABEL: @test_vsubhn_s16(
19519 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19520 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19521 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
19522 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
19523 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
19524 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)19525 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
19526 return vsubhn_s16(a, b);
19527 }
19528
19529 // CHECK-LABEL: @test_vsubhn_s32(
19530 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19531 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19532 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
19533 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
19534 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
19535 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)19536 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
19537 return vsubhn_s32(a, b);
19538 }
19539
19540 // CHECK-LABEL: @test_vsubhn_s64(
19541 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
19542 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
19543 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
19544 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
19545 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
19546 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)19547 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
19548 return vsubhn_s64(a, b);
19549 }
19550
19551 // CHECK-LABEL: @test_vsubhn_u16(
19552 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19553 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19554 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
19555 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
19556 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
19557 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)19558 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
19559 return vsubhn_u16(a, b);
19560 }
19561
19562 // CHECK-LABEL: @test_vsubhn_u32(
19563 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19564 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19565 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
19566 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
19567 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
19568 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)19569 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
19570 return vsubhn_u32(a, b);
19571 }
19572
19573 // CHECK-LABEL: @test_vsubhn_u64(
19574 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
19575 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
19576 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
19577 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
19578 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
19579 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)19580 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
19581 return vsubhn_u64(a, b);
19582 }
19583
19584 // CHECK-LABEL: @test_vsubl_s8(
19585 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
19586 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
19587 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19588 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)19589 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
19590 return vsubl_s8(a, b);
19591 }
19592
19593 // CHECK-LABEL: @test_vsubl_s16(
19594 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19595 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
19596 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19597 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
19598 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19599 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)19600 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
19601 return vsubl_s16(a, b);
19602 }
19603
19604 // CHECK-LABEL: @test_vsubl_s32(
19605 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19606 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
19607 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19608 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
19609 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19610 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)19611 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
19612 return vsubl_s32(a, b);
19613 }
19614
19615 // CHECK-LABEL: @test_vsubl_u8(
19616 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
19617 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
19618 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19619 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)19620 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
19621 return vsubl_u8(a, b);
19622 }
19623
19624 // CHECK-LABEL: @test_vsubl_u16(
19625 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19626 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
19627 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19628 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
19629 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19630 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)19631 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
19632 return vsubl_u16(a, b);
19633 }
19634
19635 // CHECK-LABEL: @test_vsubl_u32(
19636 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19637 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
19638 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19639 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
19640 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19641 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)19642 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
19643 return vsubl_u32(a, b);
19644 }
19645
19646 // CHECK-LABEL: @test_vsubw_s8(
19647 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
19648 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
19649 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)19650 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
19651 return vsubw_s8(a, b);
19652 }
19653
19654 // CHECK-LABEL: @test_vsubw_s16(
19655 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19656 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
19657 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
19658 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)19659 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
19660 return vsubw_s16(a, b);
19661 }
19662
19663 // CHECK-LABEL: @test_vsubw_s32(
19664 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19665 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
19666 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
19667 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)19668 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
19669 return vsubw_s32(a, b);
19670 }
19671
19672 // CHECK-LABEL: @test_vsubw_u8(
19673 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
19674 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
19675 // CHECK: ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)19676 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
19677 return vsubw_u8(a, b);
19678 }
19679
19680 // CHECK-LABEL: @test_vsubw_u16(
19681 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19682 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
19683 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
19684 // CHECK: ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)19685 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
19686 return vsubw_u16(a, b);
19687 }
19688
19689 // CHECK-LABEL: @test_vsubw_u32(
19690 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19691 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
19692 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
19693 // CHECK: ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)19694 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
19695 return vsubw_u32(a, b);
19696 }
19697
19698 // CHECK-LABEL: @test_vtbl1_u8(
19699 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19700 // CHECK: ret <8 x i8> [[VTBL1_I]]
test_vtbl1_u8(uint8x8_t a,uint8x8_t b)19701 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
19702 return vtbl1_u8(a, b);
19703 }
19704
19705 // CHECK-LABEL: @test_vtbl1_s8(
19706 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19707 // CHECK: ret <8 x i8> [[VTBL1_I]]
test_vtbl1_s8(int8x8_t a,int8x8_t b)19708 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
19709 return vtbl1_s8(a, b);
19710 }
19711
19712 // CHECK-LABEL: @test_vtbl1_p8(
19713 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19714 // CHECK: ret <8 x i8> [[VTBL1_I]]
test_vtbl1_p8(poly8x8_t a,uint8x8_t b)19715 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
19716 return vtbl1_p8(a, b);
19717 }
19718
19719 // CHECK-LABEL: @test_vtbl2_u8(
19720 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
19721 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8
19722 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
19723 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19724 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19725 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
19726 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19727 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19728 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19729 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19730 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19731 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19732 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19733 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19734 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19735 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19736 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19737 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19738 // CHECK: ret <8 x i8> [[VTBL2_I]]
test_vtbl2_u8(uint8x8x2_t a,uint8x8_t b)19739 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
19740 return vtbl2_u8(a, b);
19741 }
19742
19743 // CHECK-LABEL: @test_vtbl2_s8(
19744 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8
19745 // CHECK: [[A:%.*]] = alloca %struct.int8x8x2_t, align 8
19746 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
19747 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19748 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19749 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
19750 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19751 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19752 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19753 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19754 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19755 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19756 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19757 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19758 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19759 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19760 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19761 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19762 // CHECK: ret <8 x i8> [[VTBL2_I]]
test_vtbl2_s8(int8x8x2_t a,int8x8_t b)19763 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
19764 return vtbl2_s8(a, b);
19765 }
19766
19767 // CHECK-LABEL: @test_vtbl2_p8(
19768 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
19769 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8
19770 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
19771 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19772 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19773 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
19774 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19775 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19776 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19777 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19778 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19779 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19780 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19781 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19782 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19783 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19784 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19785 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19786 // CHECK: ret <8 x i8> [[VTBL2_I]]
test_vtbl2_p8(poly8x8x2_t a,uint8x8_t b)19787 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
19788 return vtbl2_p8(a, b);
19789 }
19790
19791 // CHECK-LABEL: @test_vtbl3_u8(
19792 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
19793 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8
19794 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
19795 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19796 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19797 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
19798 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19799 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19800 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19801 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19802 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19803 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19804 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19805 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19806 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19807 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19808 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19809 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19810 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19811 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19812 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19813 // CHECK: ret <8 x i8> [[VTBL3_I]]
test_vtbl3_u8(uint8x8x3_t a,uint8x8_t b)19814 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
19815 return vtbl3_u8(a, b);
19816 }
19817
19818 // CHECK-LABEL: @test_vtbl3_s8(
19819 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8
19820 // CHECK: [[A:%.*]] = alloca %struct.int8x8x3_t, align 8
19821 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
19822 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19823 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19824 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
19825 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19826 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19827 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19828 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19829 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19830 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19831 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19832 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19833 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19834 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19835 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19836 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19837 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19838 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19839 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19840 // CHECK: ret <8 x i8> [[VTBL3_I]]
test_vtbl3_s8(int8x8x3_t a,int8x8_t b)19841 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
19842 return vtbl3_s8(a, b);
19843 }
19844
19845 // CHECK-LABEL: @test_vtbl3_p8(
19846 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
19847 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8
19848 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
19849 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19850 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19851 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
19852 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19853 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19854 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19855 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19856 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19857 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19858 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19859 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19860 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19861 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19862 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19863 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19864 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19865 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19866 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19867 // CHECK: ret <8 x i8> [[VTBL3_I]]
test_vtbl3_p8(poly8x8x3_t a,uint8x8_t b)19868 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
19869 return vtbl3_p8(a, b);
19870 }
19871
19872 // CHECK-LABEL: @test_vtbl4_u8(
19873 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
19874 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8
19875 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
19876 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19877 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19878 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
19879 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19880 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19881 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19882 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19883 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19884 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19885 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19886 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19887 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19888 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19889 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19890 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19891 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19892 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19893 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19894 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19895 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19896 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19897 // CHECK: ret <8 x i8> [[VTBL4_I]]
test_vtbl4_u8(uint8x8x4_t a,uint8x8_t b)19898 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
19899 return vtbl4_u8(a, b);
19900 }
19901
19902 // CHECK-LABEL: @test_vtbl4_s8(
19903 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8
19904 // CHECK: [[A:%.*]] = alloca %struct.int8x8x4_t, align 8
19905 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
19906 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19907 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19908 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
19909 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19910 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19911 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19912 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19913 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19914 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19915 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19916 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19917 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19918 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19919 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19920 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19921 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19922 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19923 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19924 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19925 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19926 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19927 // CHECK: ret <8 x i8> [[VTBL4_I]]
test_vtbl4_s8(int8x8x4_t a,int8x8_t b)19928 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
19929 return vtbl4_s8(a, b);
19930 }
19931
19932 // CHECK-LABEL: @test_vtbl4_p8(
19933 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
19934 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8
19935 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
19936 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19937 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19938 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
19939 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19940 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19941 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19942 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19943 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19944 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19945 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19946 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19947 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19948 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19949 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19950 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19951 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19952 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19953 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19954 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19955 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19956 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19957 // CHECK: ret <8 x i8> [[VTBL4_I]]
test_vtbl4_p8(poly8x8x4_t a,uint8x8_t b)19958 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
19959 return vtbl4_p8(a, b);
19960 }
19961
19962 // CHECK-LABEL: @test_vtbx1_u8(
19963 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
19964 // CHECK: ret <8 x i8> [[VTBX1_I]]
test_vtbx1_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)19965 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
19966 return vtbx1_u8(a, b, c);
19967 }
19968
19969 // CHECK-LABEL: @test_vtbx1_s8(
19970 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
19971 // CHECK: ret <8 x i8> [[VTBX1_I]]
test_vtbx1_s8(int8x8_t a,int8x8_t b,int8x8_t c)19972 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
19973 return vtbx1_s8(a, b, c);
19974 }
19975
19976 // CHECK-LABEL: @test_vtbx1_p8(
19977 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
19978 // CHECK: ret <8 x i8> [[VTBX1_I]]
test_vtbx1_p8(poly8x8_t a,poly8x8_t b,uint8x8_t c)19979 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
19980 return vtbx1_p8(a, b, c);
19981 }
19982
19983 // CHECK-LABEL: @test_vtbx2_u8(
19984 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
19985 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
19986 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
19987 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19988 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19989 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
19990 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19991 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19992 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
19993 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19994 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19995 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
19996 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19997 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19998 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
19999 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20000 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20001 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20002 // CHECK: ret <8 x i8> [[VTBX2_I]]
test_vtbx2_u8(uint8x8_t a,uint8x8x2_t b,uint8x8_t c)20003 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
20004 return vtbx2_u8(a, b, c);
20005 }
20006
20007 // CHECK-LABEL: @test_vtbx2_s8(
20008 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8
20009 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
20010 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
20011 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20012 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20013 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
20014 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20015 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20016 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20017 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20018 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20019 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20020 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20021 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20022 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20023 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20024 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20025 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20026 // CHECK: ret <8 x i8> [[VTBX2_I]]
test_vtbx2_s8(int8x8_t a,int8x8x2_t b,int8x8_t c)20027 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
20028 return vtbx2_s8(a, b, c);
20029 }
20030
20031 // CHECK-LABEL: @test_vtbx2_p8(
20032 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
20033 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
20034 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
20035 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20036 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20037 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
20038 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20039 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20040 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20041 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20042 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20043 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20044 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20045 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20046 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20047 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20048 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20049 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20050 // CHECK: ret <8 x i8> [[VTBX2_I]]
test_vtbx2_p8(poly8x8_t a,poly8x8x2_t b,uint8x8_t c)20051 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
20052 return vtbx2_p8(a, b, c);
20053 }
20054
20055 // CHECK-LABEL: @test_vtbx3_u8(
20056 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
20057 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
20058 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
20059 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20060 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20061 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
20062 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20063 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20064 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20065 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20066 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20067 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20068 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20069 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20070 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20071 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20072 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20073 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20074 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20075 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20076 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20077 // CHECK: ret <8 x i8> [[VTBX3_I]]
test_vtbx3_u8(uint8x8_t a,uint8x8x3_t b,uint8x8_t c)20078 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
20079 return vtbx3_u8(a, b, c);
20080 }
20081
20082 // CHECK-LABEL: @test_vtbx3_s8(
20083 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8
20084 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
20085 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
20086 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20087 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20088 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
20089 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20090 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20091 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20092 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20093 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20094 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20095 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20096 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20097 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20098 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20099 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20100 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20101 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20102 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20103 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20104 // CHECK: ret <8 x i8> [[VTBX3_I]]
test_vtbx3_s8(int8x8_t a,int8x8x3_t b,int8x8_t c)20105 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
20106 return vtbx3_s8(a, b, c);
20107 }
20108
20109 // CHECK-LABEL: @test_vtbx3_p8(
20110 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
20111 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
20112 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
20113 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20114 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20115 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
20116 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20117 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20118 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20119 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20120 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20121 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20122 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20123 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20124 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20125 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20126 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20127 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20128 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20129 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20130 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20131 // CHECK: ret <8 x i8> [[VTBX3_I]]
test_vtbx3_p8(poly8x8_t a,poly8x8x3_t b,uint8x8_t c)20132 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
20133 return vtbx3_p8(a, b, c);
20134 }
20135
20136 // CHECK-LABEL: @test_vtbx4_u8(
20137 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
20138 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
20139 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
20140 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20141 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20142 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
20143 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20144 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20145 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20146 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20147 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20148 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20149 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20150 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20151 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20152 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20153 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20154 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20155 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20156 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20157 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20158 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20159 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20160 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20161 // CHECK: ret <8 x i8> [[VTBX4_I]]
test_vtbx4_u8(uint8x8_t a,uint8x8x4_t b,uint8x8_t c)20162 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
20163 return vtbx4_u8(a, b, c);
20164 }
20165
20166 // CHECK-LABEL: @test_vtbx4_s8(
20167 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8
20168 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
20169 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
20170 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20171 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20172 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
20173 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20174 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20175 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20176 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20177 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20178 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20179 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20180 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20181 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20182 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20183 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20184 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20185 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20186 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20187 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20188 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20189 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20190 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20191 // CHECK: ret <8 x i8> [[VTBX4_I]]
test_vtbx4_s8(int8x8_t a,int8x8x4_t b,int8x8_t c)20192 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
20193 return vtbx4_s8(a, b, c);
20194 }
20195
20196 // CHECK-LABEL: @test_vtbx4_p8(
20197 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
20198 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
20199 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
20200 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20201 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20202 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
20203 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20204 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20205 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20206 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20207 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20208 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20209 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20210 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20211 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20212 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20213 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20214 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20215 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20216 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20217 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20218 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20219 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20220 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20221 // CHECK: ret <8 x i8> [[VTBX4_I]]
test_vtbx4_p8(poly8x8_t a,poly8x8x4_t b,uint8x8_t c)20222 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
20223 return vtbx4_p8(a, b, c);
20224 }
20225
20226 // CHECK: @test_vtrn_s8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20227 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20228 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20229 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20230 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !3
20231 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20232 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20233 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !3
20234 // CHECK: ret void
test_vtrn_s8(int8x8_t a,int8x8_t b)20235 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
20236 return vtrn_s8(a, b);
20237 }
20238
20239 // CHECK: @test_vtrn_s16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20240 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20241 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20242 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20243 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20244 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20245 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !6
20246 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20247 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20248 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !6
20249 // CHECK: ret void
test_vtrn_s16(int16x4_t a,int16x4_t b)20250 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
20251 return vtrn_s16(a, b);
20252 }
20253
20254 // CHECK: @test_vtrn_s32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20255 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20256 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20257 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20258 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20259 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20260 // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !9
20261 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20262 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20263 // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !9
20264 // CHECK: ret void
test_vtrn_s32(int32x2_t a,int32x2_t b)20265 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
20266 return vtrn_s32(a, b);
20267 }
20268
20269 // CHECK: @test_vtrn_u8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20270 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20271 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20272 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20273 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !12
20274 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20275 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20276 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !12
20277 // CHECK: ret void
test_vtrn_u8(uint8x8_t a,uint8x8_t b)20278 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
20279 return vtrn_u8(a, b);
20280 }
20281
20282 // CHECK: @test_vtrn_u16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20283 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20284 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20285 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20286 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20287 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20288 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !15
20289 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20290 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20291 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !15
20292 // CHECK: ret void
test_vtrn_u16(uint16x4_t a,uint16x4_t b)20293 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
20294 return vtrn_u16(a, b);
20295 }
20296
20297 // CHECK: @test_vtrn_u32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20298 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20299 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20300 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20301 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20302 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20303 // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !18
20304 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20305 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20306 // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !18
20307 // CHECK: ret void
test_vtrn_u32(uint32x2_t a,uint32x2_t b)20308 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
20309 return vtrn_u32(a, b);
20310 }
20311
20312 // CHECK: @test_vtrn_f32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20313 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20314 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20315 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20316 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20317 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20318 // CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !21
20319 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
20320 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
20321 // CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !21
20322 // CHECK: ret void
test_vtrn_f32(float32x2_t a,float32x2_t b)20323 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
20324 return vtrn_f32(a, b);
20325 }
20326
20327 // CHECK: @test_vtrn_p8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20328 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
20329 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20330 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20331 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !24
20332 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20333 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20334 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !24
20335 // CHECK: ret void
test_vtrn_p8(poly8x8_t a,poly8x8_t b)20336 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
20337 return vtrn_p8(a, b);
20338 }
20339
20340 // CHECK: @test_vtrn_p16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20341 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
20342 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20343 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20344 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20345 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20346 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !27
20347 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20348 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20349 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !27
20350 // CHECK: ret void
test_vtrn_p16(poly16x4_t a,poly16x4_t b)20351 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
20352 return vtrn_p16(a, b);
20353 }
20354
20355 // CHECK: @test_vtrnq_s8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20356 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
20357 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20358 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20359 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !30
20360 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20361 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20362 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !30
20363 // CHECK: ret void
test_vtrnq_s8(int8x16_t a,int8x16_t b)20364 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
20365 return vtrnq_s8(a, b);
20366 }
20367
20368 // CHECK: @test_vtrnq_s16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20369 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
20370 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20371 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20372 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20373 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20374 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !33
20375 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20376 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20377 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !33
20378 // CHECK: ret void
test_vtrnq_s16(int16x8_t a,int16x8_t b)20379 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
20380 return vtrnq_s16(a, b);
20381 }
20382
20383 // CHECK: @test_vtrnq_s32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20384 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
20385 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20386 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20387 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20388 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20389 // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !36
20390 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20391 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20392 // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !36
20393 // CHECK: ret void
test_vtrnq_s32(int32x4_t a,int32x4_t b)20394 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
20395 return vtrnq_s32(a, b);
20396 }
20397
20398 // CHECK: @test_vtrnq_u8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20399 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
20400 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20401 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20402 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !39
20403 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20404 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20405 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !39
20406 // CHECK: ret void
test_vtrnq_u8(uint8x16_t a,uint8x16_t b)20407 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
20408 return vtrnq_u8(a, b);
20409 }
20410
20411 // CHECK: @test_vtrnq_u16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20412 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
20413 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20414 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20415 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20416 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20417 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !42
20418 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20419 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20420 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !42
20421 // CHECK: ret void
test_vtrnq_u16(uint16x8_t a,uint16x8_t b)20422 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
20423 return vtrnq_u16(a, b);
20424 }
20425
20426 // CHECK: @test_vtrnq_u32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20427 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
20428 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20429 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20430 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20431 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20432 // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !45
20433 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20434 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20435 // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !45
20436 // CHECK: ret void
test_vtrnq_u32(uint32x4_t a,uint32x4_t b)20437 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
20438 return vtrnq_u32(a, b);
20439 }
20440
20441 // CHECK: @test_vtrnq_f32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20442 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
20443 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
20444 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
20445 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
20446 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20447 // CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !48
20448 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
20449 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20450 // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !48
20451 // CHECK: ret void
test_vtrnq_f32(float32x4_t a,float32x4_t b)20452 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
20453 return vtrnq_f32(a, b);
20454 }
20455
20456 // CHECK: @test_vtrnq_p8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20457 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
20458 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20459 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20460 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !51
20461 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20462 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20463 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !51
20464 // CHECK: ret void
test_vtrnq_p8(poly8x16_t a,poly8x16_t b)20465 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
20466 return vtrnq_p8(a, b);
20467 }
20468
20469 // CHECK: @test_vtrnq_p16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20470 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
20471 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20472 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20473 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20474 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20475 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !54
20476 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20477 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20478 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !54
20479 // CHECK: ret void
test_vtrnq_p16(poly16x8_t a,poly16x8_t b)20480 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
20481 return vtrnq_p16(a, b);
20482 }
20483
20484 // CHECK-LABEL: @test_vtst_s8(
20485 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
20486 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20487 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20488 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t a,int8x8_t b)20489 uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) {
20490 return vtst_s8(a, b);
20491 }
20492
20493 // CHECK-LABEL: @test_vtst_s16(
20494 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20495 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20496 // CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
20497 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20498 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20499 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t a,int16x4_t b)20500 uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) {
20501 return vtst_s16(a, b);
20502 }
20503
20504 // CHECK-LABEL: @test_vtst_s32(
20505 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20506 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20507 // CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b
20508 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
20509 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
20510 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t a,int32x2_t b)20511 uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) {
20512 return vtst_s32(a, b);
20513 }
20514
20515 // CHECK-LABEL: @test_vtst_u8(
20516 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
20517 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20518 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20519 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t a,uint8x8_t b)20520 uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) {
20521 return vtst_u8(a, b);
20522 }
20523
20524 // CHECK-LABEL: @test_vtst_u16(
20525 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20526 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20527 // CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
20528 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20529 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20530 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t a,uint16x4_t b)20531 uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) {
20532 return vtst_u16(a, b);
20533 }
20534
20535 // CHECK-LABEL: @test_vtst_u32(
20536 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20537 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20538 // CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b
20539 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
20540 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
20541 // CHECK: ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t a,uint32x2_t b)20542 uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) {
20543 return vtst_u32(a, b);
20544 }
20545
20546 // CHECK-LABEL: @test_vtst_p8(
20547 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
20548 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20549 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20550 // CHECK: ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t a,poly8x8_t b)20551 uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) {
20552 return vtst_p8(a, b);
20553 }
20554
20555 // CHECK-LABEL: @test_vtst_p16(
20556 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20557 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20558 // CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
20559 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20560 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20561 // CHECK: ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t a,poly16x4_t b)20562 uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) {
20563 return vtst_p16(a, b);
20564 }
20565
20566 // CHECK-LABEL: @test_vtstq_s8(
20567 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
20568 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20569 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20570 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t a,int8x16_t b)20571 uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) {
20572 return vtstq_s8(a, b);
20573 }
20574
20575 // CHECK-LABEL: @test_vtstq_s16(
20576 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20577 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20578 // CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
20579 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20580 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20581 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t a,int16x8_t b)20582 uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) {
20583 return vtstq_s16(a, b);
20584 }
20585
20586 // CHECK-LABEL: @test_vtstq_s32(
20587 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20588 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20589 // CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b
20590 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
20591 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
20592 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t a,int32x4_t b)20593 uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) {
20594 return vtstq_s32(a, b);
20595 }
20596
20597 // CHECK-LABEL: @test_vtstq_u8(
20598 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
20599 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20600 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20601 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t a,uint8x16_t b)20602 uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) {
20603 return vtstq_u8(a, b);
20604 }
20605
20606 // CHECK-LABEL: @test_vtstq_u16(
20607 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20608 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20609 // CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
20610 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20611 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20612 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t a,uint16x8_t b)20613 uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) {
20614 return vtstq_u16(a, b);
20615 }
20616
20617 // CHECK-LABEL: @test_vtstq_u32(
20618 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20619 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20620 // CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b
20621 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
20622 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
20623 // CHECK: ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t a,uint32x4_t b)20624 uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) {
20625 return vtstq_u32(a, b);
20626 }
20627
20628 // CHECK-LABEL: @test_vtstq_p8(
20629 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
20630 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20631 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20632 // CHECK: ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t a,poly8x16_t b)20633 uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) {
20634 return vtstq_p8(a, b);
20635 }
20636
20637 // CHECK-LABEL: @test_vtstq_p16(
20638 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20639 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20640 // CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
20641 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20642 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20643 // CHECK: ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t a,poly16x8_t b)20644 uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) {
20645 return vtstq_p16(a, b);
20646 }
20647
20648 // CHECK: @test_vuzp_s8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20649 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20650 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20651 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20652 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !57
20653 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20654 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20655 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !57
20656 // CHECK: ret void
test_vuzp_s8(int8x8_t a,int8x8_t b)20657 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
20658 return vuzp_s8(a, b);
20659 }
20660
20661 // CHECK: @test_vuzp_s16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20662 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20663 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20664 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20665 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20666 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20667 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !60
20668 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20669 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20670 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !60
20671 // CHECK: ret void
test_vuzp_s16(int16x4_t a,int16x4_t b)20672 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
20673 return vuzp_s16(a, b);
20674 }
20675
20676 // CHECK: @test_vuzp_s32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20677 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20678 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20679 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20680 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20681 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20682 // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !63
20683 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20684 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20685 // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !63
20686 // CHECK: ret void
test_vuzp_s32(int32x2_t a,int32x2_t b)20687 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
20688 return vuzp_s32(a, b);
20689 }
20690
20691 // CHECK: @test_vuzp_u8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20692 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20693 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20694 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20695 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !66
20696 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20697 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20698 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !66
20699 // CHECK: ret void
test_vuzp_u8(uint8x8_t a,uint8x8_t b)20700 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
20701 return vuzp_u8(a, b);
20702 }
20703
20704 // CHECK: @test_vuzp_u16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20705 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20706 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20707 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20708 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20709 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20710 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !69
20711 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20712 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20713 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !69
20714 // CHECK: ret void
test_vuzp_u16(uint16x4_t a,uint16x4_t b)20715 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
20716 return vuzp_u16(a, b);
20717 }
20718
20719 // CHECK: @test_vuzp_u32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20720 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20721 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20722 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20723 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20724 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20725 // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !72
20726 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20727 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20728 // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !72
20729 // CHECK: ret void
test_vuzp_u32(uint32x2_t a,uint32x2_t b)20730 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
20731 return vuzp_u32(a, b);
20732 }
20733
20734 // CHECK: @test_vuzp_f32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20735 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20736 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20737 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20738 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20739 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20740 // CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !75
20741 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
20742 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
20743 // CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !75
20744 // CHECK: ret void
test_vuzp_f32(float32x2_t a,float32x2_t b)20745 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
20746 return vuzp_f32(a, b);
20747 }
20748
20749 // CHECK: @test_vuzp_p8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20750 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
20751 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20752 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20753 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !78
20754 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20755 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20756 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !78
20757 // CHECK: ret void
test_vuzp_p8(poly8x8_t a,poly8x8_t b)20758 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
20759 return vuzp_p8(a, b);
20760 }
20761
20762 // CHECK: @test_vuzp_p16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20763 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
20764 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20765 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20766 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20767 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20768 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !81
20769 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20770 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20771 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !81
20772 // CHECK: ret void
test_vuzp_p16(poly16x4_t a,poly16x4_t b)20773 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
20774 return vuzp_p16(a, b);
20775 }
20776
20777 // CHECK: @test_vuzpq_s8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20778 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
20779 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20780 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20781 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !84
20782 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20783 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20784 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !84
20785 // CHECK: ret void
test_vuzpq_s8(int8x16_t a,int8x16_t b)20786 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
20787 return vuzpq_s8(a, b);
20788 }
20789
20790 // CHECK: @test_vuzpq_s16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20791 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
20792 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20793 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20794 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20795 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20796 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !87
20797 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20798 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20799 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !87
20800 // CHECK: ret void
test_vuzpq_s16(int16x8_t a,int16x8_t b)20801 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
20802 return vuzpq_s16(a, b);
20803 }
20804
20805 // CHECK: @test_vuzpq_s32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20806 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
20807 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20808 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20809 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20810 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20811 // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !90
20812 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20813 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20814 // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !90
20815 // CHECK: ret void
test_vuzpq_s32(int32x4_t a,int32x4_t b)20816 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
20817 return vuzpq_s32(a, b);
20818 }
20819
20820 // CHECK: @test_vuzpq_u8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20821 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
20822 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20823 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20824 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !93
20825 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20826 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20827 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !93
20828 // CHECK: ret void
test_vuzpq_u8(uint8x16_t a,uint8x16_t b)20829 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
20830 return vuzpq_u8(a, b);
20831 }
20832
20833 // CHECK: @test_vuzpq_u16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20834 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
20835 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20836 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20837 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20838 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20839 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !96
20840 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20841 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20842 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !96
20843 // CHECK: ret void
test_vuzpq_u16(uint16x8_t a,uint16x8_t b)20844 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
20845 return vuzpq_u16(a, b);
20846 }
20847
20848 // CHECK: @test_vuzpq_u32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20849 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
20850 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20851 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20852 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20853 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20854 // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !99
20855 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20856 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20857 // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !99
20858 // CHECK: ret void
test_vuzpq_u32(uint32x4_t a,uint32x4_t b)20859 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
20860 return vuzpq_u32(a, b);
20861 }
20862
20863 // CHECK: @test_vuzpq_f32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20864 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
20865 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
20866 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
20867 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
20868 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20869 // CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !102
20870 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
20871 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20872 // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !102
20873 // CHECK: ret void
test_vuzpq_f32(float32x4_t a,float32x4_t b)20874 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
20875 return vuzpq_f32(a, b);
20876 }
20877
20878 // CHECK: @test_vuzpq_p8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20879 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
20880 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20881 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20882 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !105
20883 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20884 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20885 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !105
20886 // CHECK: ret void
test_vuzpq_p8(poly8x16_t a,poly8x16_t b)20887 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
20888 return vuzpq_p8(a, b);
20889 }
20890
20891 // CHECK: @test_vuzpq_p16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20892 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
20893 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20894 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20895 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20896 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20897 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !108
20898 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20899 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20900 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !108
20901 // CHECK: ret void
test_vuzpq_p16(poly16x8_t a,poly16x8_t b)20902 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
20903 return vuzpq_p16(a, b);
20904 }
20905
20906 // CHECK: @test_vzip_s8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20907 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20908 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20909 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
20910 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !111
20911 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20912 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
20913 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !111
20914 // CHECK: ret void
test_vzip_s8(int8x8_t a,int8x8_t b)20915 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
20916 return vzip_s8(a, b);
20917 }
20918
20919 // CHECK: @test_vzip_s16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20920 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20921 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20922 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20923 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20924 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
20925 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !114
20926 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20927 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
20928 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !114
20929 // CHECK: ret void
test_vzip_s16(int16x4_t a,int16x4_t b)20930 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
20931 return vzip_s16(a, b);
20932 }
20933
20934 // CHECK: @test_vzip_s32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20935 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20936 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20937 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20938 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20939 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20940 // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !117
20941 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20942 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20943 // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !117
20944 // CHECK: ret void
test_vzip_s32(int32x2_t a,int32x2_t b)20945 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
20946 return vzip_s32(a, b);
20947 }
20948
20949 // CHECK: @test_vzip_u8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20950 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20951 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20952 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
20953 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !120
20954 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20955 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
20956 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !120
20957 // CHECK: ret void
test_vzip_u8(uint8x8_t a,uint8x8_t b)20958 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
20959 return vzip_u8(a, b);
20960 }
20961
20962 // CHECK: @test_vzip_u16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20963 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20964 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20965 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20966 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20967 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
20968 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !123
20969 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20970 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
20971 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !123
20972 // CHECK: ret void
test_vzip_u16(uint16x4_t a,uint16x4_t b)20973 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
20974 return vzip_u16(a, b);
20975 }
20976
20977 // CHECK: @test_vzip_u32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20978 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20979 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20980 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20981 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20982 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20983 // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !126
20984 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20985 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20986 // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !126
20987 // CHECK: ret void
test_vzip_u32(uint32x2_t a,uint32x2_t b)20988 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
20989 return vzip_u32(a, b);
20990 }
20991
20992 // CHECK: @test_vzip_f32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20993 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20994 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20995 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20996 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20997 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20998 // CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !129
20999 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
21000 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
21001 // CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !129
21002 // CHECK: ret void
test_vzip_f32(float32x2_t a,float32x2_t b)21003 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
21004 return vzip_f32(a, b);
21005 }
21006
21007 // CHECK: @test_vzip_p8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21008 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
21009 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
21010 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21011 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !132
21012 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
21013 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21014 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !132
21015 // CHECK: ret void
test_vzip_p8(poly8x8_t a,poly8x8_t b)21016 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
21017 return vzip_p8(a, b);
21018 }
21019
21020 // CHECK: @test_vzip_p16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21021 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
21022 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
21023 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
21024 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
21025 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21026 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !135
21027 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
21028 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21029 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !135
21030 // CHECK: ret void
test_vzip_p16(poly16x4_t a,poly16x4_t b)21031 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
21032 return vzip_p16(a, b);
21033 }
21034
21035 // CHECK: @test_vzipq_s8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21036 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
21037 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21038 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21039 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !138
21040 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21041 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21042 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !138
21043 // CHECK: ret void
test_vzipq_s8(int8x16_t a,int8x16_t b)21044 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
21045 return vzipq_s8(a, b);
21046 }
21047
21048 // CHECK: @test_vzipq_s16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21049 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
21050 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21051 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21052 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21053 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21054 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !141
21055 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21056 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21057 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !141
21058 // CHECK: ret void
test_vzipq_s16(int16x8_t a,int16x8_t b)21059 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
21060 return vzipq_s16(a, b);
21061 }
21062
21063 // CHECK: @test_vzipq_s32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21064 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
21065 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21066 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21067 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
21068 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21069 // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !144
21070 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
21071 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21072 // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !144
21073 // CHECK: ret void
test_vzipq_s32(int32x4_t a,int32x4_t b)21074 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
21075 return vzipq_s32(a, b);
21076 }
21077
21078 // CHECK: @test_vzipq_u8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21079 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
21080 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21081 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21082 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !147
21083 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21084 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21085 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !147
21086 // CHECK: ret void
test_vzipq_u8(uint8x16_t a,uint8x16_t b)21087 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
21088 return vzipq_u8(a, b);
21089 }
21090
21091 // CHECK: @test_vzipq_u16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21092 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
21093 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21094 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21095 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21096 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21097 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !150
21098 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21099 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21100 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !150
21101 // CHECK: ret void
test_vzipq_u16(uint16x8_t a,uint16x8_t b)21102 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
21103 return vzipq_u16(a, b);
21104 }
21105
21106 // CHECK: @test_vzipq_u32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21107 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
21108 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21109 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21110 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
21111 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21112 // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !153
21113 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
21114 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21115 // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !153
21116 // CHECK: ret void
test_vzipq_u32(uint32x4_t a,uint32x4_t b)21117 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
21118 return vzipq_u32(a, b);
21119 }
21120
21121 // CHECK: @test_vzipq_f32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21122 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
21123 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
21124 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
21125 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
21126 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21127 // CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !156
21128 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
21129 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21130 // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !156
21131 // CHECK: ret void
test_vzipq_f32(float32x4_t a,float32x4_t b)21132 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
21133 return vzipq_f32(a, b);
21134 }
21135
21136 // CHECK: @test_vzipq_p8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21137 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
21138 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21139 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21140 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !159
21141 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21142 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21143 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !159
21144 // CHECK: ret void
test_vzipq_p8(poly8x16_t a,poly8x16_t b)21145 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
21146 return vzipq_p8(a, b);
21147 }
21148
21149 // CHECK: @test_vzipq_p16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21150 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
21151 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21152 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21153 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21154 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21155 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !162
21156 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21157 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21158 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !162
21159 // CHECK: ret void
test_vzipq_p16(poly16x8_t a,poly16x8_t b)21160 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
21161 return vzipq_p16(a, b);
21162 }
21163