1 // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
2 // RUN:  -target-cpu swift -fallow-half-arguments-and-returns \
3 // RUN:  -target-feature +fullfp16 -ffreestanding \
4 // RUN:  -flax-vector-conversions=none \
5 // RUN:  -disable-O0-optnone -emit-llvm -o - %s \
6 // RUN:  | opt -S -mem2reg | FileCheck %s
7 
8 #include <arm_neon.h>
9 
10 // CHECK-LABEL: @test_vaba_s8(
11 // CHECK:   [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
12 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
13 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t a,int8x8_t b,int8x8_t c)14 int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
15   return vaba_s8(a, b, c);
16 }
17 
18 // CHECK-LABEL: @test_vaba_s16(
19 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
21 // CHECK:   [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
22 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
23 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
24 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t a,int16x4_t b,int16x4_t c)25 int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
26   return vaba_s16(a, b, c);
27 }
28 
29 // CHECK-LABEL: @test_vaba_s32(
30 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
31 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
32 // CHECK:   [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
33 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
34 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
35 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t a,int32x2_t b,int32x2_t c)36 int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
37   return vaba_s32(a, b, c);
38 }
39 
40 // CHECK-LABEL: @test_vaba_u8(
41 // CHECK:   [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
42 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
43 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)44 uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
45   return vaba_u8(a, b, c);
46 }
47 
48 // CHECK-LABEL: @test_vaba_u16(
49 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
50 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
51 // CHECK:   [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
52 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
53 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
54 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)55 uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
56   return vaba_u16(a, b, c);
57 }
58 
59 // CHECK-LABEL: @test_vaba_u32(
60 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
61 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
62 // CHECK:   [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
63 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
64 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
65 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)66 uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
67   return vaba_u32(a, b, c);
68 }
69 
70 // CHECK-LABEL: @test_vabaq_s8(
71 // CHECK:   [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c)
72 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
73 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)74 int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
75   return vabaq_s8(a, b, c);
76 }
77 
78 // CHECK-LABEL: @test_vabaq_s16(
79 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
80 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
81 // CHECK:   [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c)
82 // CHECK:   [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
83 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
84 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)85 int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
86   return vabaq_s16(a, b, c);
87 }
88 
89 // CHECK-LABEL: @test_vabaq_s32(
90 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
91 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
92 // CHECK:   [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c)
93 // CHECK:   [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
94 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
95 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)96 int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
97   return vabaq_s32(a, b, c);
98 }
99 
100 // CHECK-LABEL: @test_vabaq_u8(
101 // CHECK:   [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c)
102 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
103 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)104 uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
105   return vabaq_u8(a, b, c);
106 }
107 
108 // CHECK-LABEL: @test_vabaq_u16(
109 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
110 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
111 // CHECK:   [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c)
112 // CHECK:   [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
113 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
114 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)115 uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
116   return vabaq_u16(a, b, c);
117 }
118 
119 // CHECK-LABEL: @test_vabaq_u32(
120 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
121 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
122 // CHECK:   [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c)
123 // CHECK:   [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
124 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
125 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)126 uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
127   return vabaq_u32(a, b, c);
128 }
129 
130 // CHECK-LABEL: @test_vabal_s8(
131 // CHECK:   [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
132 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
133 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
134 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)135 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
136   return vabal_s8(a, b, c);
137 }
138 
139 // CHECK-LABEL: @test_vabal_s16(
140 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
141 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
142 // CHECK:   [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
143 // CHECK:   [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
144 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
145 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
146 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
147 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)148 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
149   return vabal_s16(a, b, c);
150 }
151 
152 // CHECK-LABEL: @test_vabal_s32(
153 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
154 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
155 // CHECK:   [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
156 // CHECK:   [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
157 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
158 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
159 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
160 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)161 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
162   return vabal_s32(a, b, c);
163 }
164 
165 // CHECK-LABEL: @test_vabal_u8(
166 // CHECK:   [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
167 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
168 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
169 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)170 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
171   return vabal_u8(a, b, c);
172 }
173 
174 // CHECK-LABEL: @test_vabal_u16(
175 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
176 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
177 // CHECK:   [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
178 // CHECK:   [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
179 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
180 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
181 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
182 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)183 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
184   return vabal_u16(a, b, c);
185 }
186 
187 // CHECK-LABEL: @test_vabal_u32(
188 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
189 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
190 // CHECK:   [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
191 // CHECK:   [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
192 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
193 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
194 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
195 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)196 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
197   return vabal_u32(a, b, c);
198 }
199 
200 // CHECK-LABEL: @test_vabd_s8(
201 // CHECK:   [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
202 // CHECK:   ret <8 x i8> [[VABD_V_I]]
test_vabd_s8(int8x8_t a,int8x8_t b)203 int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) {
204   return vabd_s8(a, b);
205 }
206 
207 // CHECK-LABEL: @test_vabd_s16(
208 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
209 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
210 // CHECK:   [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
211 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
212 // CHECK:   ret <4 x i16> [[VABD_V2_I]]
test_vabd_s16(int16x4_t a,int16x4_t b)213 int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) {
214   return vabd_s16(a, b);
215 }
216 
217 // CHECK-LABEL: @test_vabd_s32(
218 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
219 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
220 // CHECK:   [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
221 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
222 // CHECK:   ret <2 x i32> [[VABD_V2_I]]
test_vabd_s32(int32x2_t a,int32x2_t b)223 int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) {
224   return vabd_s32(a, b);
225 }
226 
227 // CHECK-LABEL: @test_vabd_u8(
228 // CHECK:   [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
229 // CHECK:   ret <8 x i8> [[VABD_V_I]]
test_vabd_u8(uint8x8_t a,uint8x8_t b)230 uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) {
231   return vabd_u8(a, b);
232 }
233 
234 // CHECK-LABEL: @test_vabd_u16(
235 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
236 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
237 // CHECK:   [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
238 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
239 // CHECK:   ret <4 x i16> [[VABD_V2_I]]
test_vabd_u16(uint16x4_t a,uint16x4_t b)240 uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) {
241   return vabd_u16(a, b);
242 }
243 
244 // CHECK-LABEL: @test_vabd_u32(
245 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
246 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
247 // CHECK:   [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
248 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
249 // CHECK:   ret <2 x i32> [[VABD_V2_I]]
test_vabd_u32(uint32x2_t a,uint32x2_t b)250 uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) {
251   return vabd_u32(a, b);
252 }
253 
254 // CHECK-LABEL: @test_vabd_f32(
255 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
256 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
257 // CHECK:   [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b)
258 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8>
259 // CHECK:   ret <2 x float> [[VABD_V2_I]]
test_vabd_f32(float32x2_t a,float32x2_t b)260 float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) {
261   return vabd_f32(a, b);
262 }
263 
264 // CHECK-LABEL: @test_vabdq_s8(
265 // CHECK:   [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b)
266 // CHECK:   ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_s8(int8x16_t a,int8x16_t b)267 int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) {
268   return vabdq_s8(a, b);
269 }
270 
271 // CHECK-LABEL: @test_vabdq_s16(
272 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
273 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
274 // CHECK:   [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b)
275 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
276 // CHECK:   ret <8 x i16> [[VABDQ_V2_I]]
test_vabdq_s16(int16x8_t a,int16x8_t b)277 int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) {
278   return vabdq_s16(a, b);
279 }
280 
281 // CHECK-LABEL: @test_vabdq_s32(
282 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
283 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
284 // CHECK:   [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b)
285 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
286 // CHECK:   ret <4 x i32> [[VABDQ_V2_I]]
test_vabdq_s32(int32x4_t a,int32x4_t b)287 int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) {
288   return vabdq_s32(a, b);
289 }
290 
291 // CHECK-LABEL: @test_vabdq_u8(
292 // CHECK:   [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b)
293 // CHECK:   ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_u8(uint8x16_t a,uint8x16_t b)294 uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) {
295   return vabdq_u8(a, b);
296 }
297 
298 // CHECK-LABEL: @test_vabdq_u16(
299 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
300 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
301 // CHECK:   [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b)
302 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
303 // CHECK:   ret <8 x i16> [[VABDQ_V2_I]]
test_vabdq_u16(uint16x8_t a,uint16x8_t b)304 uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) {
305   return vabdq_u16(a, b);
306 }
307 
308 // CHECK-LABEL: @test_vabdq_u32(
309 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
310 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
311 // CHECK:   [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b)
312 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
313 // CHECK:   ret <4 x i32> [[VABDQ_V2_I]]
test_vabdq_u32(uint32x4_t a,uint32x4_t b)314 uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) {
315   return vabdq_u32(a, b);
316 }
317 
318 // CHECK-LABEL: @test_vabdq_f32(
319 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
320 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
321 // CHECK:   [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b)
322 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8>
323 // CHECK:   ret <4 x float> [[VABDQ_V2_I]]
test_vabdq_f32(float32x4_t a,float32x4_t b)324 float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) {
325   return vabdq_f32(a, b);
326 }
327 
328 // CHECK-LABEL: @test_vabdl_s8(
329 // CHECK:   [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
330 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
331 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)332 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
333   return vabdl_s8(a, b);
334 }
335 
336 // CHECK-LABEL: @test_vabdl_s16(
337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
339 // CHECK:   [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
340 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
341 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
342 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
343 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)344 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
345   return vabdl_s16(a, b);
346 }
347 
348 // CHECK-LABEL: @test_vabdl_s32(
349 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
350 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
351 // CHECK:   [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
352 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
353 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
354 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
355 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)356 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
357   return vabdl_s32(a, b);
358 }
359 
360 // CHECK-LABEL: @test_vabdl_u8(
361 // CHECK:   [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
362 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
363 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)364 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
365   return vabdl_u8(a, b);
366 }
367 
368 // CHECK-LABEL: @test_vabdl_u16(
369 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
370 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
371 // CHECK:   [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
372 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
373 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
374 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
375 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)376 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
377   return vabdl_u16(a, b);
378 }
379 
380 // CHECK-LABEL: @test_vabdl_u32(
381 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
382 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
383 // CHECK:   [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
384 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
385 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
386 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
387 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)388 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
389   return vabdl_u32(a, b);
390 }
391 
392 // CHECK-LABEL: @test_vabs_s8(
393 // CHECK:   [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a)
394 // CHECK:   ret <8 x i8> [[VABS_I]]
test_vabs_s8(int8x8_t a)395 int8x8_t test_vabs_s8(int8x8_t a) {
396   return vabs_s8(a);
397 }
398 
399 // CHECK-LABEL: @test_vabs_s16(
400 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
401 // CHECK:   [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a)
402 // CHECK:   ret <4 x i16> [[VABS1_I]]
test_vabs_s16(int16x4_t a)403 int16x4_t test_vabs_s16(int16x4_t a) {
404   return vabs_s16(a);
405 }
406 
407 // CHECK-LABEL: @test_vabs_s32(
408 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
409 // CHECK:   [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a)
410 // CHECK:   ret <2 x i32> [[VABS1_I]]
test_vabs_s32(int32x2_t a)411 int32x2_t test_vabs_s32(int32x2_t a) {
412   return vabs_s32(a);
413 }
414 
415 // CHECK-LABEL: @test_vabs_f32(
416 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
417 // CHECK:   [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
418 // CHECK:   ret <2 x float> [[VABS1_I]]
test_vabs_f32(float32x2_t a)419 float32x2_t test_vabs_f32(float32x2_t a) {
420   return vabs_f32(a);
421 }
422 
423 // CHECK-LABEL: @test_vabsq_s8(
424 // CHECK:   [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a)
425 // CHECK:   ret <16 x i8> [[VABS_I]]
test_vabsq_s8(int8x16_t a)426 int8x16_t test_vabsq_s8(int8x16_t a) {
427   return vabsq_s8(a);
428 }
429 
430 // CHECK-LABEL: @test_vabsq_s16(
431 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
432 // CHECK:   [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a)
433 // CHECK:   ret <8 x i16> [[VABS1_I]]
test_vabsq_s16(int16x8_t a)434 int16x8_t test_vabsq_s16(int16x8_t a) {
435   return vabsq_s16(a);
436 }
437 
438 // CHECK-LABEL: @test_vabsq_s32(
439 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
440 // CHECK:   [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a)
441 // CHECK:   ret <4 x i32> [[VABS1_I]]
test_vabsq_s32(int32x4_t a)442 int32x4_t test_vabsq_s32(int32x4_t a) {
443   return vabsq_s32(a);
444 }
445 
446 // CHECK-LABEL: @test_vabsq_f32(
447 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
448 // CHECK:   [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
449 // CHECK:   ret <4 x float> [[VABS1_I]]
test_vabsq_f32(float32x4_t a)450 float32x4_t test_vabsq_f32(float32x4_t a) {
451   return vabsq_f32(a);
452 }
453 
454 // CHECK-LABEL: @test_vadd_s8(
455 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, %b
456 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t a,int8x8_t b)457 int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) {
458   return vadd_s8(a, b);
459 }
460 
461 // CHECK-LABEL: @test_vadd_s16(
462 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, %b
463 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t a,int16x4_t b)464 int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) {
465   return vadd_s16(a, b);
466 }
467 
468 // CHECK-LABEL: @test_vadd_s32(
469 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, %b
470 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t a,int32x2_t b)471 int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) {
472   return vadd_s32(a, b);
473 }
474 
475 // CHECK-LABEL: @test_vadd_s64(
476 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %a, %b
477 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t a,int64x1_t b)478 int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) {
479   return vadd_s64(a, b);
480 }
481 
482 // CHECK-LABEL: @test_vadd_f32(
483 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %a, %b
484 // CHECK:   ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t a,float32x2_t b)485 float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) {
486   return vadd_f32(a, b);
487 }
488 
489 // CHECK-LABEL: @test_vadd_u8(
490 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, %b
491 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t a,uint8x8_t b)492 uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) {
493   return vadd_u8(a, b);
494 }
495 
496 // CHECK-LABEL: @test_vadd_u16(
497 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, %b
498 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t a,uint16x4_t b)499 uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) {
500   return vadd_u16(a, b);
501 }
502 
503 // CHECK-LABEL: @test_vadd_u32(
504 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, %b
505 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t a,uint32x2_t b)506 uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) {
507   return vadd_u32(a, b);
508 }
509 
510 // CHECK-LABEL: @test_vadd_u64(
511 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %a, %b
512 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t a,uint64x1_t b)513 uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) {
514   return vadd_u64(a, b);
515 }
516 
517 // CHECK-LABEL: @test_vaddq_s8(
518 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, %b
519 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t a,int8x16_t b)520 int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) {
521   return vaddq_s8(a, b);
522 }
523 
524 // CHECK-LABEL: @test_vaddq_s16(
525 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, %b
526 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t a,int16x8_t b)527 int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) {
528   return vaddq_s16(a, b);
529 }
530 
531 // CHECK-LABEL: @test_vaddq_s32(
532 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, %b
533 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t a,int32x4_t b)534 int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) {
535   return vaddq_s32(a, b);
536 }
537 
538 // CHECK-LABEL: @test_vaddq_s64(
539 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, %b
540 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t a,int64x2_t b)541 int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) {
542   return vaddq_s64(a, b);
543 }
544 
545 // CHECK-LABEL: @test_vaddq_f32(
546 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %a, %b
547 // CHECK:   ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t a,float32x4_t b)548 float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) {
549   return vaddq_f32(a, b);
550 }
551 
552 // CHECK-LABEL: @test_vaddq_u8(
553 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, %b
554 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t a,uint8x16_t b)555 uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) {
556   return vaddq_u8(a, b);
557 }
558 
559 // CHECK-LABEL: @test_vaddq_u16(
560 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, %b
561 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t a,uint16x8_t b)562 uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) {
563   return vaddq_u16(a, b);
564 }
565 
566 // CHECK-LABEL: @test_vaddq_u32(
567 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, %b
568 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t a,uint32x4_t b)569 uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) {
570   return vaddq_u32(a, b);
571 }
572 
573 // CHECK-LABEL: @test_vaddq_u64(
574 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, %b
575 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t a,uint64x2_t b)576 uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) {
577   return vaddq_u64(a, b);
578 }
579 
580 // CHECK-LABEL: @test_vaddhn_s16(
581 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
582 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
583 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
584 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
585 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
586 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)587 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
588   return vaddhn_s16(a, b);
589 }
590 
591 // CHECK-LABEL: @test_vaddhn_s32(
592 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
593 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
594 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
595 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
596 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
597 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)598 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
599   return vaddhn_s32(a, b);
600 }
601 
602 // CHECK-LABEL: @test_vaddhn_s64(
603 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
604 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
605 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
606 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
607 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
608 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)609 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
610   return vaddhn_s64(a, b);
611 }
612 
613 // CHECK-LABEL: @test_vaddhn_u16(
614 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
615 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
616 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
617 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
618 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
619 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)620 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
621   return vaddhn_u16(a, b);
622 }
623 
624 // CHECK-LABEL: @test_vaddhn_u32(
625 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
626 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
627 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
628 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
629 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
630 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)631 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
632   return vaddhn_u32(a, b);
633 }
634 
635 // CHECK-LABEL: @test_vaddhn_u64(
636 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
637 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
638 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
639 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
640 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
641 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)642 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
643   return vaddhn_u64(a, b);
644 }
645 
646 // CHECK-LABEL: @test_vaddl_s8(
647 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
648 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
649 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
650 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)651 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
652   return vaddl_s8(a, b);
653 }
654 
655 // CHECK-LABEL: @test_vaddl_s16(
656 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
657 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
658 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
659 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
660 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
661 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)662 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
663   return vaddl_s16(a, b);
664 }
665 
666 // CHECK-LABEL: @test_vaddl_s32(
667 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
668 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
669 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
670 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
671 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
672 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)673 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
674   return vaddl_s32(a, b);
675 }
676 
677 // CHECK-LABEL: @test_vaddl_u8(
678 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
679 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
680 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
681 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)682 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
683   return vaddl_u8(a, b);
684 }
685 
686 // CHECK-LABEL: @test_vaddl_u16(
687 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
688 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
689 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
690 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
691 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
692 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)693 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
694   return vaddl_u16(a, b);
695 }
696 
697 // CHECK-LABEL: @test_vaddl_u32(
698 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
699 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
700 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
701 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
702 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
703 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)704 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
705   return vaddl_u32(a, b);
706 }
707 
708 // CHECK-LABEL: @test_vaddw_s8(
709 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
710 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
711 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)712 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
713   return vaddw_s8(a, b);
714 }
715 
716 // CHECK-LABEL: @test_vaddw_s16(
717 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
718 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
719 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
720 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)721 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
722   return vaddw_s16(a, b);
723 }
724 
725 // CHECK-LABEL: @test_vaddw_s32(
726 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
727 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
728 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
729 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)730 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
731   return vaddw_s32(a, b);
732 }
733 
734 // CHECK-LABEL: @test_vaddw_u8(
735 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
736 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
737 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)738 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
739   return vaddw_u8(a, b);
740 }
741 
742 // CHECK-LABEL: @test_vaddw_u16(
743 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
744 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
745 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
746 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)747 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
748   return vaddw_u16(a, b);
749 }
750 
751 // CHECK-LABEL: @test_vaddw_u32(
752 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
753 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
754 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
755 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)756 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
757   return vaddw_u32(a, b);
758 }
759 
760 // CHECK-LABEL: @test_vand_s8(
761 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, %b
762 // CHECK:   ret <8 x i8> [[AND_I]]
test_vand_s8(int8x8_t a,int8x8_t b)763 int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
764   return vand_s8(a, b);
765 }
766 
767 // CHECK-LABEL: @test_vand_s16(
768 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, %b
769 // CHECK:   ret <4 x i16> [[AND_I]]
test_vand_s16(int16x4_t a,int16x4_t b)770 int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
771   return vand_s16(a, b);
772 }
773 
774 // CHECK-LABEL: @test_vand_s32(
775 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, %b
776 // CHECK:   ret <2 x i32> [[AND_I]]
test_vand_s32(int32x2_t a,int32x2_t b)777 int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
778   return vand_s32(a, b);
779 }
780 
781 // CHECK-LABEL: @test_vand_s64(
782 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, %b
783 // CHECK:   ret <1 x i64> [[AND_I]]
test_vand_s64(int64x1_t a,int64x1_t b)784 int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
785   return vand_s64(a, b);
786 }
787 
788 // CHECK-LABEL: @test_vand_u8(
789 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, %b
790 // CHECK:   ret <8 x i8> [[AND_I]]
test_vand_u8(uint8x8_t a,uint8x8_t b)791 uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
792   return vand_u8(a, b);
793 }
794 
795 // CHECK-LABEL: @test_vand_u16(
796 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, %b
797 // CHECK:   ret <4 x i16> [[AND_I]]
test_vand_u16(uint16x4_t a,uint16x4_t b)798 uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
799   return vand_u16(a, b);
800 }
801 
802 // CHECK-LABEL: @test_vand_u32(
803 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, %b
804 // CHECK:   ret <2 x i32> [[AND_I]]
test_vand_u32(uint32x2_t a,uint32x2_t b)805 uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
806   return vand_u32(a, b);
807 }
808 
809 // CHECK-LABEL: @test_vand_u64(
810 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, %b
811 // CHECK:   ret <1 x i64> [[AND_I]]
test_vand_u64(uint64x1_t a,uint64x1_t b)812 uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
813   return vand_u64(a, b);
814 }
815 
816 // CHECK-LABEL: @test_vandq_s8(
817 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, %b
818 // CHECK:   ret <16 x i8> [[AND_I]]
test_vandq_s8(int8x16_t a,int8x16_t b)819 int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
820   return vandq_s8(a, b);
821 }
822 
823 // CHECK-LABEL: @test_vandq_s16(
824 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, %b
825 // CHECK:   ret <8 x i16> [[AND_I]]
test_vandq_s16(int16x8_t a,int16x8_t b)826 int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
827   return vandq_s16(a, b);
828 }
829 
830 // CHECK-LABEL: @test_vandq_s32(
831 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, %b
832 // CHECK:   ret <4 x i32> [[AND_I]]
test_vandq_s32(int32x4_t a,int32x4_t b)833 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
834   return vandq_s32(a, b);
835 }
836 
837 // CHECK-LABEL: @test_vandq_s64(
838 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, %b
839 // CHECK:   ret <2 x i64> [[AND_I]]
test_vandq_s64(int64x2_t a,int64x2_t b)840 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
841   return vandq_s64(a, b);
842 }
843 
844 // CHECK-LABEL: @test_vandq_u8(
845 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, %b
846 // CHECK:   ret <16 x i8> [[AND_I]]
test_vandq_u8(uint8x16_t a,uint8x16_t b)847 uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
848   return vandq_u8(a, b);
849 }
850 
851 // CHECK-LABEL: @test_vandq_u16(
852 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, %b
853 // CHECK:   ret <8 x i16> [[AND_I]]
test_vandq_u16(uint16x8_t a,uint16x8_t b)854 uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
855   return vandq_u16(a, b);
856 }
857 
858 // CHECK-LABEL: @test_vandq_u32(
859 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, %b
860 // CHECK:   ret <4 x i32> [[AND_I]]
test_vandq_u32(uint32x4_t a,uint32x4_t b)861 uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
862   return vandq_u32(a, b);
863 }
864 
865 // CHECK-LABEL: @test_vandq_u64(
866 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, %b
867 // CHECK:   ret <2 x i64> [[AND_I]]
test_vandq_u64(uint64x2_t a,uint64x2_t b)868 uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
869   return vandq_u64(a, b);
870 }
871 
872 // CHECK-LABEL: @test_vbic_s8(
873 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
874 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
875 // CHECK:   ret <8 x i8> [[AND_I]]
test_vbic_s8(int8x8_t a,int8x8_t b)876 int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
877   return vbic_s8(a, b);
878 }
879 
880 // CHECK-LABEL: @test_vbic_s16(
881 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
882 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
883 // CHECK:   ret <4 x i16> [[AND_I]]
test_vbic_s16(int16x4_t a,int16x4_t b)884 int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
885   return vbic_s16(a, b);
886 }
887 
888 // CHECK-LABEL: @test_vbic_s32(
889 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
890 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
891 // CHECK:   ret <2 x i32> [[AND_I]]
test_vbic_s32(int32x2_t a,int32x2_t b)892 int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
893   return vbic_s32(a, b);
894 }
895 
896 // CHECK-LABEL: @test_vbic_s64(
897 // CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
898 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
899 // CHECK:   ret <1 x i64> [[AND_I]]
test_vbic_s64(int64x1_t a,int64x1_t b)900 int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
901   return vbic_s64(a, b);
902 }
903 
904 // CHECK-LABEL: @test_vbic_u8(
905 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
906 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
907 // CHECK:   ret <8 x i8> [[AND_I]]
test_vbic_u8(uint8x8_t a,uint8x8_t b)908 uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
909   return vbic_u8(a, b);
910 }
911 
912 // CHECK-LABEL: @test_vbic_u16(
913 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
914 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
915 // CHECK:   ret <4 x i16> [[AND_I]]
test_vbic_u16(uint16x4_t a,uint16x4_t b)916 uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
917   return vbic_u16(a, b);
918 }
919 
920 // CHECK-LABEL: @test_vbic_u32(
921 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
922 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
923 // CHECK:   ret <2 x i32> [[AND_I]]
test_vbic_u32(uint32x2_t a,uint32x2_t b)924 uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
925   return vbic_u32(a, b);
926 }
927 
928 // CHECK-LABEL: @test_vbic_u64(
929 // CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
930 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
931 // CHECK:   ret <1 x i64> [[AND_I]]
test_vbic_u64(uint64x1_t a,uint64x1_t b)932 uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
933   return vbic_u64(a, b);
934 }
935 
936 // CHECK-LABEL: @test_vbicq_s8(
937 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
938 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
939 // CHECK:   ret <16 x i8> [[AND_I]]
test_vbicq_s8(int8x16_t a,int8x16_t b)940 int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
941   return vbicq_s8(a, b);
942 }
943 
944 // CHECK-LABEL: @test_vbicq_s16(
945 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
946 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
947 // CHECK:   ret <8 x i16> [[AND_I]]
test_vbicq_s16(int16x8_t a,int16x8_t b)948 int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
949   return vbicq_s16(a, b);
950 }
951 
952 // CHECK-LABEL: @test_vbicq_s32(
953 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
954 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
955 // CHECK:   ret <4 x i32> [[AND_I]]
test_vbicq_s32(int32x4_t a,int32x4_t b)956 int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
957   return vbicq_s32(a, b);
958 }
959 
960 // CHECK-LABEL: @test_vbicq_s64(
961 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
962 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
963 // CHECK:   ret <2 x i64> [[AND_I]]
test_vbicq_s64(int64x2_t a,int64x2_t b)964 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
965   return vbicq_s64(a, b);
966 }
967 
968 // CHECK-LABEL: @test_vbicq_u8(
969 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
970 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
971 // CHECK:   ret <16 x i8> [[AND_I]]
test_vbicq_u8(uint8x16_t a,uint8x16_t b)972 uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
973   return vbicq_u8(a, b);
974 }
975 
976 // CHECK-LABEL: @test_vbicq_u16(
977 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
978 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
979 // CHECK:   ret <8 x i16> [[AND_I]]
test_vbicq_u16(uint16x8_t a,uint16x8_t b)980 uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
981   return vbicq_u16(a, b);
982 }
983 
984 // CHECK-LABEL: @test_vbicq_u32(
985 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
986 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
987 // CHECK:   ret <4 x i32> [[AND_I]]
test_vbicq_u32(uint32x4_t a,uint32x4_t b)988 uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
989   return vbicq_u32(a, b);
990 }
991 
992 // CHECK-LABEL: @test_vbicq_u64(
993 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
994 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
995 // CHECK:   ret <2 x i64> [[AND_I]]
test_vbicq_u64(uint64x2_t a,uint64x2_t b)996 uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
997   return vbicq_u64(a, b);
998 }
999 
1000 // CHECK-LABEL: @test_vbsl_s8(
1001 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1002 // CHECK:   ret <8 x i8> [[VBSL_V_I]]
test_vbsl_s8(uint8x8_t a,int8x8_t b,int8x8_t c)1003 int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) {
1004   return vbsl_s8(a, b, c);
1005 }
1006 
1007 // CHECK-LABEL: @test_vbsl_s16(
1008 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1009 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1010 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1011 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1012 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1013 // CHECK:   ret <4 x i16> [[TMP3]]
test_vbsl_s16(uint16x4_t a,int16x4_t b,int16x4_t c)1014 int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) {
1015   return vbsl_s16(a, b, c);
1016 }
1017 
1018 // CHECK-LABEL: @test_vbsl_s32(
1019 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1020 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1021 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1022 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1023 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1024 // CHECK:   ret <2 x i32> [[TMP3]]
test_vbsl_s32(uint32x2_t a,int32x2_t b,int32x2_t c)1025 int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) {
1026   return vbsl_s32(a, b, c);
1027 }
1028 
1029 // CHECK-LABEL: @test_vbsl_s64(
1030 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1031 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1032 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1033 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1034 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1035 // CHECK:   ret <1 x i64> [[TMP3]]
test_vbsl_s64(uint64x1_t a,int64x1_t b,int64x1_t c)1036 int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) {
1037   return vbsl_s64(a, b, c);
1038 }
1039 
1040 // CHECK-LABEL: @test_vbsl_u8(
1041 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1042 // CHECK:   ret <8 x i8> [[VBSL_V_I]]
test_vbsl_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)1043 uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
1044   return vbsl_u8(a, b, c);
1045 }
1046 
1047 // CHECK-LABEL: @test_vbsl_u16(
1048 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1049 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1050 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1051 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1052 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1053 // CHECK:   ret <4 x i16> [[TMP3]]
test_vbsl_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)1054 uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
1055   return vbsl_u16(a, b, c);
1056 }
1057 
1058 // CHECK-LABEL: @test_vbsl_u32(
1059 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1060 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1061 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1062 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1063 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1064 // CHECK:   ret <2 x i32> [[TMP3]]
test_vbsl_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)1065 uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
1066   return vbsl_u32(a, b, c);
1067 }
1068 
1069 // CHECK-LABEL: @test_vbsl_u64(
1070 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1071 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1072 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1073 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1074 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1075 // CHECK:   ret <1 x i64> [[TMP3]]
test_vbsl_u64(uint64x1_t a,uint64x1_t b,uint64x1_t c)1076 uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
1077   return vbsl_u64(a, b, c);
1078 }
1079 
1080 // CHECK-LABEL: @test_vbsl_f32(
1081 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1082 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1083 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
1084 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1085 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float>
1086 // CHECK:   ret <2 x float> [[TMP3]]
test_vbsl_f32(uint32x2_t a,float32x2_t b,float32x2_t c)1087 float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) {
1088   return vbsl_f32(a, b, c);
1089 }
1090 
1091 // CHECK-LABEL: @test_vbsl_p8(
1092 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1093 // CHECK:   ret <8 x i8> [[VBSL_V_I]]
test_vbsl_p8(uint8x8_t a,poly8x8_t b,poly8x8_t c)1094 poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
1095   return vbsl_p8(a, b, c);
1096 }
1097 
1098 // CHECK-LABEL: @test_vbsl_p16(
1099 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1100 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1101 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1102 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1103 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1104 // CHECK:   ret <4 x i16> [[TMP3]]
test_vbsl_p16(uint16x4_t a,poly16x4_t b,poly16x4_t c)1105 poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
1106   return vbsl_p16(a, b, c);
1107 }
1108 
1109 // CHECK-LABEL: @test_vbslq_s8(
1110 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1111 // CHECK:   ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_s8(uint8x16_t a,int8x16_t b,int8x16_t c)1112 int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) {
1113   return vbslq_s8(a, b, c);
1114 }
1115 
1116 // CHECK-LABEL: @test_vbslq_s16(
1117 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1118 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1119 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1120 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1121 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1122 // CHECK:   ret <8 x i16> [[TMP3]]
test_vbslq_s16(uint16x8_t a,int16x8_t b,int16x8_t c)1123 int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) {
1124   return vbslq_s16(a, b, c);
1125 }
1126 
1127 // CHECK-LABEL: @test_vbslq_s32(
1128 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1129 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1130 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1131 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1132 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1133 // CHECK:   ret <4 x i32> [[TMP3]]
test_vbslq_s32(uint32x4_t a,int32x4_t b,int32x4_t c)1134 int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) {
1135   return vbslq_s32(a, b, c);
1136 }
1137 
1138 // CHECK-LABEL: @test_vbslq_s64(
1139 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1140 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1141 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1142 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1143 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1144 // CHECK:   ret <2 x i64> [[TMP3]]
test_vbslq_s64(uint64x2_t a,int64x2_t b,int64x2_t c)1145 int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) {
1146   return vbslq_s64(a, b, c);
1147 }
1148 
1149 // CHECK-LABEL: @test_vbslq_u8(
1150 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1151 // CHECK:   ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)1152 uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
1153   return vbslq_u8(a, b, c);
1154 }
1155 
1156 // CHECK-LABEL: @test_vbslq_u16(
1157 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1158 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1159 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1160 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1161 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1162 // CHECK:   ret <8 x i16> [[TMP3]]
test_vbslq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)1163 uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
1164   return vbslq_u16(a, b, c);
1165 }
1166 
1167 // CHECK-LABEL: @test_vbslq_u32(
1168 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1169 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1170 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1171 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1172 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1173 // CHECK:   ret <4 x i32> [[TMP3]]
test_vbslq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)1174 uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
1175   return vbslq_u32(a, b, c);
1176 }
1177 
1178 // CHECK-LABEL: @test_vbslq_u64(
1179 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1180 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1181 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1182 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1183 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1184 // CHECK:   ret <2 x i64> [[TMP3]]
test_vbslq_u64(uint64x2_t a,uint64x2_t b,uint64x2_t c)1185 uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
1186   return vbslq_u64(a, b, c);
1187 }
1188 
1189 // CHECK-LABEL: @test_vbslq_f32(
1190 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1191 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1192 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
1193 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1194 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float>
1195 // CHECK:   ret <4 x float> [[TMP3]]
test_vbslq_f32(uint32x4_t a,float32x4_t b,float32x4_t c)1196 float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) {
1197   return vbslq_f32(a, b, c);
1198 }
1199 
1200 // CHECK-LABEL: @test_vbslq_p8(
1201 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1202 // CHECK:   ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_p8(uint8x16_t a,poly8x16_t b,poly8x16_t c)1203 poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
1204   return vbslq_p8(a, b, c);
1205 }
1206 
1207 // CHECK-LABEL: @test_vbslq_p16(
1208 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1209 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1210 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1211 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1212 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1213 // CHECK:   ret <8 x i16> [[TMP3]]
test_vbslq_p16(uint16x8_t a,poly16x8_t b,poly16x8_t c)1214 poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
1215   return vbslq_p16(a, b, c);
1216 }
1217 
1218 // CHECK-LABEL: @test_vcage_f32(
1219 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1220 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1221 // CHECK:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1222 // CHECK:   ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t a,float32x2_t b)1223 uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) {
1224   return vcage_f32(a, b);
1225 }
1226 
1227 // CHECK-LABEL: @test_vcageq_f32(
1228 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1229 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1230 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1231 // CHECK:   ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t a,float32x4_t b)1232 uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) {
1233   return vcageq_f32(a, b);
1234 }
1235 
1236 // CHECK-LABEL: @test_vcagt_f32(
1237 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1238 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1239 // CHECK:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1240 // CHECK:   ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t a,float32x2_t b)1241 uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) {
1242   return vcagt_f32(a, b);
1243 }
1244 
1245 // CHECK-LABEL: @test_vcagtq_f32(
1246 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1247 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1248 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1249 // CHECK:   ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t a,float32x4_t b)1250 uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) {
1251   return vcagtq_f32(a, b);
1252 }
1253 
1254 // CHECK-LABEL: @test_vcale_f32(
1255 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1256 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1257 // CHECK:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1258 // CHECK:   ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t a,float32x2_t b)1259 uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) {
1260   return vcale_f32(a, b);
1261 }
1262 
1263 // CHECK-LABEL: @test_vcaleq_f32(
1264 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1265 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1266 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1267 // CHECK:   ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t a,float32x4_t b)1268 uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) {
1269   return vcaleq_f32(a, b);
1270 }
1271 
1272 // CHECK-LABEL: @test_vcalt_f32(
1273 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1274 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1275 // CHECK:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1276 // CHECK:   ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t a,float32x2_t b)1277 uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) {
1278   return vcalt_f32(a, b);
1279 }
1280 
1281 // CHECK-LABEL: @test_vcaltq_f32(
1282 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1283 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1284 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1285 // CHECK:   ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t a,float32x4_t b)1286 uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) {
1287   return vcaltq_f32(a, b);
1288 }
1289 
1290 // CHECK-LABEL: @test_vceq_s8(
1291 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1292 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1293 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t a,int8x8_t b)1294 uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) {
1295   return vceq_s8(a, b);
1296 }
1297 
1298 // CHECK-LABEL: @test_vceq_s16(
1299 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1300 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1301 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t a,int16x4_t b)1302 uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) {
1303   return vceq_s16(a, b);
1304 }
1305 
1306 // CHECK-LABEL: @test_vceq_s32(
1307 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1308 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1309 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t a,int32x2_t b)1310 uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) {
1311   return vceq_s32(a, b);
1312 }
1313 
1314 // CHECK-LABEL: @test_vceq_f32(
1315 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b
1316 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1317 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t a,float32x2_t b)1318 uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) {
1319   return vceq_f32(a, b);
1320 }
1321 
1322 // CHECK-LABEL: @test_vceq_u8(
1323 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1324 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1325 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t a,uint8x8_t b)1326 uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) {
1327   return vceq_u8(a, b);
1328 }
1329 
1330 // CHECK-LABEL: @test_vceq_u16(
1331 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1332 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1333 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t a,uint16x4_t b)1334 uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) {
1335   return vceq_u16(a, b);
1336 }
1337 
1338 // CHECK-LABEL: @test_vceq_u32(
1339 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1340 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1341 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t a,uint32x2_t b)1342 uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) {
1343   return vceq_u32(a, b);
1344 }
1345 
1346 // CHECK-LABEL: @test_vceq_p8(
1347 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1348 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1349 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t a,poly8x8_t b)1350 uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) {
1351   return vceq_p8(a, b);
1352 }
1353 
1354 // CHECK-LABEL: @test_vceqq_s8(
1355 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1356 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1357 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t a,int8x16_t b)1358 uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) {
1359   return vceqq_s8(a, b);
1360 }
1361 
1362 // CHECK-LABEL: @test_vceqq_s16(
1363 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1364 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1365 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t a,int16x8_t b)1366 uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) {
1367   return vceqq_s16(a, b);
1368 }
1369 
1370 // CHECK-LABEL: @test_vceqq_s32(
1371 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1372 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1373 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t a,int32x4_t b)1374 uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) {
1375   return vceqq_s32(a, b);
1376 }
1377 
1378 // CHECK-LABEL: @test_vceqq_f32(
1379 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b
1380 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1381 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t a,float32x4_t b)1382 uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) {
1383   return vceqq_f32(a, b);
1384 }
1385 
1386 // CHECK-LABEL: @test_vceqq_u8(
1387 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1388 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1389 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t a,uint8x16_t b)1390 uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) {
1391   return vceqq_u8(a, b);
1392 }
1393 
1394 // CHECK-LABEL: @test_vceqq_u16(
1395 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1396 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1397 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t a,uint16x8_t b)1398 uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) {
1399   return vceqq_u16(a, b);
1400 }
1401 
1402 // CHECK-LABEL: @test_vceqq_u32(
1403 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1404 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1405 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t a,uint32x4_t b)1406 uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) {
1407   return vceqq_u32(a, b);
1408 }
1409 
1410 // CHECK-LABEL: @test_vceqq_p8(
1411 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1412 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1413 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t a,poly8x16_t b)1414 uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) {
1415   return vceqq_p8(a, b);
1416 }
1417 
1418 // CHECK-LABEL: @test_vcge_s8(
1419 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b
1420 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1421 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t a,int8x8_t b)1422 uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) {
1423   return vcge_s8(a, b);
1424 }
1425 
1426 // CHECK-LABEL: @test_vcge_s16(
1427 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b
1428 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1429 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t a,int16x4_t b)1430 uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) {
1431   return vcge_s16(a, b);
1432 }
1433 
1434 // CHECK-LABEL: @test_vcge_s32(
1435 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b
1436 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1437 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t a,int32x2_t b)1438 uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) {
1439   return vcge_s32(a, b);
1440 }
1441 
1442 // CHECK-LABEL: @test_vcge_f32(
1443 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b
1444 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1445 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t a,float32x2_t b)1446 uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) {
1447   return vcge_f32(a, b);
1448 }
1449 
1450 // CHECK-LABEL: @test_vcge_u8(
1451 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b
1452 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1453 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t a,uint8x8_t b)1454 uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) {
1455   return vcge_u8(a, b);
1456 }
1457 
1458 // CHECK-LABEL: @test_vcge_u16(
1459 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b
1460 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1461 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t a,uint16x4_t b)1462 uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) {
1463   return vcge_u16(a, b);
1464 }
1465 
1466 // CHECK-LABEL: @test_vcge_u32(
1467 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b
1468 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1469 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t a,uint32x2_t b)1470 uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) {
1471   return vcge_u32(a, b);
1472 }
1473 
1474 // CHECK-LABEL: @test_vcgeq_s8(
1475 // CHECK:   [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b
1476 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1477 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t a,int8x16_t b)1478 uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) {
1479   return vcgeq_s8(a, b);
1480 }
1481 
1482 // CHECK-LABEL: @test_vcgeq_s16(
1483 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b
1484 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1485 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t a,int16x8_t b)1486 uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) {
1487   return vcgeq_s16(a, b);
1488 }
1489 
1490 // CHECK-LABEL: @test_vcgeq_s32(
1491 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b
1492 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1493 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t a,int32x4_t b)1494 uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) {
1495   return vcgeq_s32(a, b);
1496 }
1497 
1498 // CHECK-LABEL: @test_vcgeq_f32(
1499 // CHECK:   [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b
1500 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1501 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t a,float32x4_t b)1502 uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) {
1503   return vcgeq_f32(a, b);
1504 }
1505 
1506 // CHECK-LABEL: @test_vcgeq_u8(
1507 // CHECK:   [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b
1508 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1509 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t a,uint8x16_t b)1510 uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) {
1511   return vcgeq_u8(a, b);
1512 }
1513 
1514 // CHECK-LABEL: @test_vcgeq_u16(
1515 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b
1516 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1517 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t a,uint16x8_t b)1518 uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) {
1519   return vcgeq_u16(a, b);
1520 }
1521 
1522 // CHECK-LABEL: @test_vcgeq_u32(
1523 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b
1524 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1525 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t a,uint32x4_t b)1526 uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) {
1527   return vcgeq_u32(a, b);
1528 }
1529 
1530 // CHECK-LABEL: @test_vcgt_s8(
1531 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b
1532 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1533 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t a,int8x8_t b)1534 uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) {
1535   return vcgt_s8(a, b);
1536 }
1537 
1538 // CHECK-LABEL: @test_vcgt_s16(
1539 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b
1540 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1541 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t a,int16x4_t b)1542 uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) {
1543   return vcgt_s16(a, b);
1544 }
1545 
1546 // CHECK-LABEL: @test_vcgt_s32(
1547 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b
1548 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1549 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t a,int32x2_t b)1550 uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) {
1551   return vcgt_s32(a, b);
1552 }
1553 
1554 // CHECK-LABEL: @test_vcgt_f32(
1555 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b
1556 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1557 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t a,float32x2_t b)1558 uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) {
1559   return vcgt_f32(a, b);
1560 }
1561 
1562 // CHECK-LABEL: @test_vcgt_u8(
1563 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b
1564 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1565 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t a,uint8x8_t b)1566 uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) {
1567   return vcgt_u8(a, b);
1568 }
1569 
1570 // CHECK-LABEL: @test_vcgt_u16(
1571 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b
1572 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1573 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t a,uint16x4_t b)1574 uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) {
1575   return vcgt_u16(a, b);
1576 }
1577 
1578 // CHECK-LABEL: @test_vcgt_u32(
1579 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b
1580 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1581 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t a,uint32x2_t b)1582 uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) {
1583   return vcgt_u32(a, b);
1584 }
1585 
1586 // CHECK-LABEL: @test_vcgtq_s8(
1587 // CHECK:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b
1588 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1589 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t a,int8x16_t b)1590 uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) {
1591   return vcgtq_s8(a, b);
1592 }
1593 
1594 // CHECK-LABEL: @test_vcgtq_s16(
1595 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b
1596 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1597 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t a,int16x8_t b)1598 uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) {
1599   return vcgtq_s16(a, b);
1600 }
1601 
1602 // CHECK-LABEL: @test_vcgtq_s32(
1603 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b
1604 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1605 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t a,int32x4_t b)1606 uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) {
1607   return vcgtq_s32(a, b);
1608 }
1609 
1610 // CHECK-LABEL: @test_vcgtq_f32(
1611 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b
1612 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1613 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t a,float32x4_t b)1614 uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) {
1615   return vcgtq_f32(a, b);
1616 }
1617 
1618 // CHECK-LABEL: @test_vcgtq_u8(
1619 // CHECK:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b
1620 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1621 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t a,uint8x16_t b)1622 uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) {
1623   return vcgtq_u8(a, b);
1624 }
1625 
1626 // CHECK-LABEL: @test_vcgtq_u16(
1627 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b
1628 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1629 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t a,uint16x8_t b)1630 uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) {
1631   return vcgtq_u16(a, b);
1632 }
1633 
1634 // CHECK-LABEL: @test_vcgtq_u32(
1635 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b
1636 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1637 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t a,uint32x4_t b)1638 uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) {
1639   return vcgtq_u32(a, b);
1640 }
1641 
1642 // CHECK-LABEL: @test_vcle_s8(
1643 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b
1644 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1645 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcle_s8(int8x8_t a,int8x8_t b)1646 uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) {
1647   return vcle_s8(a, b);
1648 }
1649 
1650 // CHECK-LABEL: @test_vcle_s16(
1651 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b
1652 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1653 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t a,int16x4_t b)1654 uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) {
1655   return vcle_s16(a, b);
1656 }
1657 
1658 // CHECK-LABEL: @test_vcle_s32(
1659 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b
1660 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1661 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t a,int32x2_t b)1662 uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) {
1663   return vcle_s32(a, b);
1664 }
1665 
1666 // CHECK-LABEL: @test_vcle_f32(
1667 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b
1668 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1669 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t a,float32x2_t b)1670 uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) {
1671   return vcle_f32(a, b);
1672 }
1673 
1674 // CHECK-LABEL: @test_vcle_u8(
1675 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b
1676 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1677 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t a,uint8x8_t b)1678 uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) {
1679   return vcle_u8(a, b);
1680 }
1681 
1682 // CHECK-LABEL: @test_vcle_u16(
1683 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b
1684 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1685 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t a,uint16x4_t b)1686 uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) {
1687   return vcle_u16(a, b);
1688 }
1689 
1690 // CHECK-LABEL: @test_vcle_u32(
1691 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b
1692 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1693 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t a,uint32x2_t b)1694 uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) {
1695   return vcle_u32(a, b);
1696 }
1697 
1698 // CHECK-LABEL: @test_vcleq_s8(
1699 // CHECK:   [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b
1700 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1701 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t a,int8x16_t b)1702 uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) {
1703   return vcleq_s8(a, b);
1704 }
1705 
1706 // CHECK-LABEL: @test_vcleq_s16(
1707 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b
1708 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1709 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t a,int16x8_t b)1710 uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) {
1711   return vcleq_s16(a, b);
1712 }
1713 
1714 // CHECK-LABEL: @test_vcleq_s32(
1715 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b
1716 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1717 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t a,int32x4_t b)1718 uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) {
1719   return vcleq_s32(a, b);
1720 }
1721 
1722 // CHECK-LABEL: @test_vcleq_f32(
1723 // CHECK:   [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b
1724 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1725 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t a,float32x4_t b)1726 uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) {
1727   return vcleq_f32(a, b);
1728 }
1729 
1730 // CHECK-LABEL: @test_vcleq_u8(
1731 // CHECK:   [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b
1732 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1733 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t a,uint8x16_t b)1734 uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) {
1735   return vcleq_u8(a, b);
1736 }
1737 
1738 // CHECK-LABEL: @test_vcleq_u16(
1739 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b
1740 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1741 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t a,uint16x8_t b)1742 uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) {
1743   return vcleq_u16(a, b);
1744 }
1745 
1746 // CHECK-LABEL: @test_vcleq_u32(
1747 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b
1748 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1749 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t a,uint32x4_t b)1750 uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) {
1751   return vcleq_u32(a, b);
1752 }
1753 
1754 // CHECK-LABEL: @test_vcls_s8(
1755 // CHECK:   [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
1756 // CHECK:   ret <8 x i8> [[VCLS_V_I]]
test_vcls_s8(int8x8_t a)1757 int8x8_t test_vcls_s8(int8x8_t a) {
1758   return vcls_s8(a);
1759 }
1760 
1761 // CHECK-LABEL: @test_vcls_s16(
1762 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1763 // CHECK:   [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
1764 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
1765 // CHECK:   ret <4 x i16> [[VCLS_V1_I]]
test_vcls_s16(int16x4_t a)1766 int16x4_t test_vcls_s16(int16x4_t a) {
1767   return vcls_s16(a);
1768 }
1769 
1770 // CHECK-LABEL: @test_vcls_s32(
1771 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1772 // CHECK:   [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
1773 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
1774 // CHECK:   ret <2 x i32> [[VCLS_V1_I]]
test_vcls_s32(int32x2_t a)1775 int32x2_t test_vcls_s32(int32x2_t a) {
1776   return vcls_s32(a);
1777 }
1778 
1779 // CHECK-LABEL: @test_vclsq_s8(
1780 // CHECK:   [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
1781 // CHECK:   ret <16 x i8> [[VCLSQ_V_I]]
test_vclsq_s8(int8x16_t a)1782 int8x16_t test_vclsq_s8(int8x16_t a) {
1783   return vclsq_s8(a);
1784 }
1785 
1786 // CHECK-LABEL: @test_vclsq_s16(
1787 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1788 // CHECK:   [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
1789 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
1790 // CHECK:   ret <8 x i16> [[VCLSQ_V1_I]]
test_vclsq_s16(int16x8_t a)1791 int16x8_t test_vclsq_s16(int16x8_t a) {
1792   return vclsq_s16(a);
1793 }
1794 
1795 // CHECK-LABEL: @test_vclsq_s32(
1796 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1797 // CHECK:   [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
1798 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
1799 // CHECK:   ret <4 x i32> [[VCLSQ_V1_I]]
test_vclsq_s32(int32x4_t a)1800 int32x4_t test_vclsq_s32(int32x4_t a) {
1801   return vclsq_s32(a);
1802 }
1803 
1804 // CHECK-LABEL: @test_vclt_s8(
1805 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b
1806 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1807 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vclt_s8(int8x8_t a,int8x8_t b)1808 uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) {
1809   return vclt_s8(a, b);
1810 }
1811 
1812 // CHECK-LABEL: @test_vclt_s16(
1813 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b
1814 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1815 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t a,int16x4_t b)1816 uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) {
1817   return vclt_s16(a, b);
1818 }
1819 
1820 // CHECK-LABEL: @test_vclt_s32(
1821 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b
1822 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1823 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t a,int32x2_t b)1824 uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) {
1825   return vclt_s32(a, b);
1826 }
1827 
1828 // CHECK-LABEL: @test_vclt_f32(
1829 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b
1830 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1831 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t a,float32x2_t b)1832 uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) {
1833   return vclt_f32(a, b);
1834 }
1835 
1836 // CHECK-LABEL: @test_vclt_u8(
1837 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b
1838 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1839 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t a,uint8x8_t b)1840 uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) {
1841   return vclt_u8(a, b);
1842 }
1843 
1844 // CHECK-LABEL: @test_vclt_u16(
1845 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b
1846 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1847 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t a,uint16x4_t b)1848 uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) {
1849   return vclt_u16(a, b);
1850 }
1851 
1852 // CHECK-LABEL: @test_vclt_u32(
1853 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b
1854 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1855 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t a,uint32x2_t b)1856 uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) {
1857   return vclt_u32(a, b);
1858 }
1859 
1860 // CHECK-LABEL: @test_vcltq_s8(
1861 // CHECK:   [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b
1862 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1863 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t a,int8x16_t b)1864 uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) {
1865   return vcltq_s8(a, b);
1866 }
1867 
1868 // CHECK-LABEL: @test_vcltq_s16(
1869 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b
1870 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1871 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t a,int16x8_t b)1872 uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) {
1873   return vcltq_s16(a, b);
1874 }
1875 
1876 // CHECK-LABEL: @test_vcltq_s32(
1877 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b
1878 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1879 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t a,int32x4_t b)1880 uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) {
1881   return vcltq_s32(a, b);
1882 }
1883 
1884 // CHECK-LABEL: @test_vcltq_f32(
1885 // CHECK:   [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b
1886 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1887 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t a,float32x4_t b)1888 uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) {
1889   return vcltq_f32(a, b);
1890 }
1891 
1892 // CHECK-LABEL: @test_vcltq_u8(
1893 // CHECK:   [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b
1894 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1895 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t a,uint8x16_t b)1896 uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) {
1897   return vcltq_u8(a, b);
1898 }
1899 
1900 // CHECK-LABEL: @test_vcltq_u16(
1901 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b
1902 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1903 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t a,uint16x8_t b)1904 uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) {
1905   return vcltq_u16(a, b);
1906 }
1907 
1908 // CHECK-LABEL: @test_vcltq_u32(
1909 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b
1910 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1911 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t a,uint32x4_t b)1912 uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) {
1913   return vcltq_u32(a, b);
1914 }
1915 
1916 // CHECK-LABEL: @test_vclz_s8(
1917 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1918 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
test_vclz_s8(int8x8_t a)1919 int8x8_t test_vclz_s8(int8x8_t a) {
1920   return vclz_s8(a);
1921 }
1922 
1923 // CHECK-LABEL: @test_vclz_s16(
1924 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1925 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
1926 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
1927 // CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
test_vclz_s16(int16x4_t a)1928 int16x4_t test_vclz_s16(int16x4_t a) {
1929   return vclz_s16(a);
1930 }
1931 
1932 // CHECK-LABEL: @test_vclz_s32(
1933 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1934 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
1935 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
1936 // CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
test_vclz_s32(int32x2_t a)1937 int32x2_t test_vclz_s32(int32x2_t a) {
1938   return vclz_s32(a);
1939 }
1940 
1941 // CHECK-LABEL: @test_vclz_u8(
1942 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1943 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
test_vclz_u8(uint8x8_t a)1944 uint8x8_t test_vclz_u8(uint8x8_t a) {
1945   return vclz_u8(a);
1946 }
1947 
1948 // CHECK-LABEL: @test_vclz_u16(
1949 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1950 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
1951 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
1952 // CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
test_vclz_u16(uint16x4_t a)1953 uint16x4_t test_vclz_u16(uint16x4_t a) {
1954   return vclz_u16(a);
1955 }
1956 
1957 // CHECK-LABEL: @test_vclz_u32(
1958 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1959 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
1960 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
1961 // CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
test_vclz_u32(uint32x2_t a)1962 uint32x2_t test_vclz_u32(uint32x2_t a) {
1963   return vclz_u32(a);
1964 }
1965 
1966 // CHECK-LABEL: @test_vclzq_s8(
1967 // CHECK:   [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
1968 // CHECK:   ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_s8(int8x16_t a)1969 int8x16_t test_vclzq_s8(int8x16_t a) {
1970   return vclzq_s8(a);
1971 }
1972 
1973 // CHECK-LABEL: @test_vclzq_s16(
1974 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1975 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
1976 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
1977 // CHECK:   ret <8 x i16> [[VCLZQ_V1_I]]
test_vclzq_s16(int16x8_t a)1978 int16x8_t test_vclzq_s16(int16x8_t a) {
1979   return vclzq_s16(a);
1980 }
1981 
1982 // CHECK-LABEL: @test_vclzq_s32(
1983 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1984 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
1985 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
1986 // CHECK:   ret <4 x i32> [[VCLZQ_V1_I]]
test_vclzq_s32(int32x4_t a)1987 int32x4_t test_vclzq_s32(int32x4_t a) {
1988   return vclzq_s32(a);
1989 }
1990 
1991 // CHECK-LABEL: @test_vclzq_u8(
1992 // CHECK:   [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
1993 // CHECK:   ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_u8(uint8x16_t a)1994 uint8x16_t test_vclzq_u8(uint8x16_t a) {
1995   return vclzq_u8(a);
1996 }
1997 
1998 // CHECK-LABEL: @test_vclzq_u16(
1999 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2000 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
2001 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2002 // CHECK:   ret <8 x i16> [[VCLZQ_V1_I]]
test_vclzq_u16(uint16x8_t a)2003 uint16x8_t test_vclzq_u16(uint16x8_t a) {
2004   return vclzq_u16(a);
2005 }
2006 
2007 // CHECK-LABEL: @test_vclzq_u32(
2008 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2009 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
2010 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2011 // CHECK:   ret <4 x i32> [[VCLZQ_V1_I]]
test_vclzq_u32(uint32x4_t a)2012 uint32x4_t test_vclzq_u32(uint32x4_t a) {
2013   return vclzq_u32(a);
2014 }
2015 
2016 // CHECK-LABEL: @test_vcnt_u8(
2017 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2018 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
test_vcnt_u8(uint8x8_t a)2019 uint8x8_t test_vcnt_u8(uint8x8_t a) {
2020   return vcnt_u8(a);
2021 }
2022 
2023 // CHECK-LABEL: @test_vcnt_s8(
2024 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2025 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
test_vcnt_s8(int8x8_t a)2026 int8x8_t test_vcnt_s8(int8x8_t a) {
2027   return vcnt_s8(a);
2028 }
2029 
2030 // CHECK-LABEL: @test_vcnt_p8(
2031 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2032 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
test_vcnt_p8(poly8x8_t a)2033 poly8x8_t test_vcnt_p8(poly8x8_t a) {
2034   return vcnt_p8(a);
2035 }
2036 
2037 // CHECK-LABEL: @test_vcntq_u8(
2038 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2039 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_u8(uint8x16_t a)2040 uint8x16_t test_vcntq_u8(uint8x16_t a) {
2041   return vcntq_u8(a);
2042 }
2043 
2044 // CHECK-LABEL: @test_vcntq_s8(
2045 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2046 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_s8(int8x16_t a)2047 int8x16_t test_vcntq_s8(int8x16_t a) {
2048   return vcntq_s8(a);
2049 }
2050 
2051 // CHECK-LABEL: @test_vcntq_p8(
2052 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2053 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_p8(poly8x16_t a)2054 poly8x16_t test_vcntq_p8(poly8x16_t a) {
2055   return vcntq_p8(a);
2056 }
2057 
2058 // CHECK-LABEL: @test_vcombine_s8(
2059 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2060 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_s8(int8x8_t a,int8x8_t b)2061 int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) {
2062   return vcombine_s8(a, b);
2063 }
2064 
2065 // CHECK-LABEL: @test_vcombine_s16(
2066 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2067 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_s16(int16x4_t a,int16x4_t b)2068 int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) {
2069   return vcombine_s16(a, b);
2070 }
2071 
2072 // CHECK-LABEL: @test_vcombine_s32(
2073 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2074 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_s32(int32x2_t a,int32x2_t b)2075 int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) {
2076   return vcombine_s32(a, b);
2077 }
2078 
2079 // CHECK-LABEL: @test_vcombine_s64(
2080 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2081 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_s64(int64x1_t a,int64x1_t b)2082 int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) {
2083   return vcombine_s64(a, b);
2084 }
2085 
2086 // CHECK-LABEL: @test_vcombine_f16(
2087 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2088 // CHECK:   ret <8 x half> [[SHUFFLE_I]]
test_vcombine_f16(float16x4_t a,float16x4_t b)2089 float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) {
2090   return vcombine_f16(a, b);
2091 }
2092 
2093 // CHECK-LABEL: @test_vcombine_f32(
2094 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2095 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vcombine_f32(float32x2_t a,float32x2_t b)2096 float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) {
2097   return vcombine_f32(a, b);
2098 }
2099 
2100 // CHECK-LABEL: @test_vcombine_u8(
2101 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2102 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_u8(uint8x8_t a,uint8x8_t b)2103 uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) {
2104   return vcombine_u8(a, b);
2105 }
2106 
2107 // CHECK-LABEL: @test_vcombine_u16(
2108 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2109 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_u16(uint16x4_t a,uint16x4_t b)2110 uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) {
2111   return vcombine_u16(a, b);
2112 }
2113 
2114 // CHECK-LABEL: @test_vcombine_u32(
2115 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2116 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_u32(uint32x2_t a,uint32x2_t b)2117 uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) {
2118   return vcombine_u32(a, b);
2119 }
2120 
2121 // CHECK-LABEL: @test_vcombine_u64(
2122 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2123 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_u64(uint64x1_t a,uint64x1_t b)2124 uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) {
2125   return vcombine_u64(a, b);
2126 }
2127 
2128 // CHECK-LABEL: @test_vcombine_p8(
2129 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2130 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_p8(poly8x8_t a,poly8x8_t b)2131 poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) {
2132   return vcombine_p8(a, b);
2133 }
2134 
2135 // CHECK-LABEL: @test_vcombine_p16(
2136 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2137 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_p16(poly16x4_t a,poly16x4_t b)2138 poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) {
2139   return vcombine_p16(a, b);
2140 }
2141 
2142 // CHECK-LABEL: @test_vcreate_s8(
2143 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2144 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2145 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_s8(uint64_t a)2146 int8x8_t test_vcreate_s8(uint64_t a) {
2147   return vclz_s8(vcreate_s8(a));
2148 }
2149 
2150 // CHECK-LABEL: @test_vcreate_imm
2151 // CHECK: [[RES:%.*]] = bitcast i64 0 to <4 x i16>
2152 // CHECK: ret <4 x i16> [[RES]]
test_vcreate_imm(void)2153 int16x4_t test_vcreate_imm(void) {
2154   return vcreate_s16(0);
2155 }
2156 
2157 // CHECK-LABEL: @test_vcreate_s16(
2158 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2159 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2160 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2161 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2162 // CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
test_vcreate_s16(uint64_t a)2163 int16x4_t test_vcreate_s16(uint64_t a) {
2164   return vclz_s16(vcreate_s16(a));
2165 }
2166 
2167 // CHECK-LABEL: @test_vcreate_s32(
2168 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2169 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2170 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2171 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2172 // CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
test_vcreate_s32(uint64_t a)2173 int32x2_t test_vcreate_s32(uint64_t a) {
2174   return vclz_s32(vcreate_s32(a));
2175 }
2176 
2177 // CHECK-LABEL: @test_vcreate_f16(
2178 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <4 x half>
2179 // CHECK:   ret <4 x half> [[TMP0]]
test_vcreate_f16(uint64_t a)2180 float16x4_t test_vcreate_f16(uint64_t a) {
2181   return vcreate_f16(a);
2182 }
2183 
2184 // CHECK-LABEL: @test_vcreate_f32(
2185 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <2 x float>
2186 // CHECK:   ret <2 x float> [[TMP0]]
test_vcreate_f32(uint64_t a)2187 float32x2_t test_vcreate_f32(uint64_t a) {
2188   return vcreate_f32(a);
2189 }
2190 
2191 // CHECK-LABEL: @test_vcreate_u8(
2192 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2193 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2194 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_u8(uint64_t a)2195 int8x8_t test_vcreate_u8(uint64_t a) {
2196   return vclz_s8((int8x8_t)vcreate_u8(a));
2197 }
2198 
2199 // CHECK-LABEL: @test_vcreate_u16(
2200 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2201 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2202 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2203 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2204 // CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
test_vcreate_u16(uint64_t a)2205 int16x4_t test_vcreate_u16(uint64_t a) {
2206   return vclz_s16((int16x4_t)vcreate_u16(a));
2207 }
2208 
2209 // CHECK-LABEL: @test_vcreate_u32(
2210 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2211 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2212 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2213 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2214 // CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
test_vcreate_u32(uint64_t a)2215 int32x2_t test_vcreate_u32(uint64_t a) {
2216   return vclz_s32((int32x2_t)vcreate_u32(a));
2217 }
2218 
2219 // CHECK-LABEL: @test_vcreate_u64(
2220 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2221 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2222 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vcreate_u64(uint64_t a)2223 uint64x1_t test_vcreate_u64(uint64_t a) {
2224   uint64x1_t tmp = vcreate_u64(a);
2225   return vadd_u64(tmp, tmp);
2226 }
2227 
2228 // CHECK-LABEL: @test_vcreate_p8(
2229 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2230 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]])
2231 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
test_vcreate_p8(uint64_t a)2232 poly8x8_t test_vcreate_p8(uint64_t a) {
2233   return vcnt_p8(vcreate_p8(a));
2234 }
2235 
2236 // CHECK-LABEL: @test_vcreate_p16(
2237 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2238 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2239 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2240 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2241 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
2242 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
2243 // CHECK:   ret <4 x i16> [[TMP4]]
test_vcreate_p16(uint64_t a)2244 poly16x4_t test_vcreate_p16(uint64_t a) {
2245   poly16x4_t tmp = vcreate_p16(a);
2246   return vbsl_p16((uint16x4_t)tmp, tmp, tmp);
2247 }
2248 
2249 // CHECK-LABEL: @test_vcreate_s64(
2250 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2251 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2252 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vcreate_s64(uint64_t a)2253 int64x1_t test_vcreate_s64(uint64_t a) {
2254   int64x1_t tmp = vcreate_s64(a);
2255   return vadd_s64(tmp, tmp);
2256 }
2257 
2258 // CHECK-LABEL: @test_vcvt_f16_f32(
2259 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2260 // CHECK:   [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a)
2261 // CHECK:   [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
2262 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
2263 // CHECK:   ret <4 x half> [[TMP1]]
test_vcvt_f16_f32(float32x4_t a)2264 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
2265   return vcvt_f16_f32(a);
2266 }
2267 
2268 // CHECK-LABEL: @test_vcvt_f32_s32(
2269 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2270 // CHECK:   [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float>
2271 // CHECK:   ret <2 x float> [[VCVT_I]]
test_vcvt_f32_s32(int32x2_t a)2272 float32x2_t test_vcvt_f32_s32(int32x2_t a) {
2273   return vcvt_f32_s32(a);
2274 }
2275 
2276 // CHECK-LABEL: @test_vcvt_f32_u32(
2277 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2278 // CHECK:   [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float>
2279 // CHECK:   ret <2 x float> [[VCVT_I]]
test_vcvt_f32_u32(uint32x2_t a)2280 float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
2281   return vcvt_f32_u32(a);
2282 }
2283 
2284 // CHECK-LABEL: @test_vcvtq_f32_s32(
2285 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2286 // CHECK:   [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float>
2287 // CHECK:   ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_s32(int32x4_t a)2288 float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
2289   return vcvtq_f32_s32(a);
2290 }
2291 
2292 // CHECK-LABEL: @test_vcvtq_f32_u32(
2293 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2294 // CHECK:   [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float>
2295 // CHECK:   ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_u32(uint32x4_t a)2296 float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
2297   return vcvtq_f32_u32(a);
2298 }
2299 
2300 // CHECK-LABEL: @test_vcvt_f32_f16(
2301 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
2302 // CHECK:   [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2303 // CHECK:   [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
2304 // CHECK:   [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
2305 // CHECK:   ret <4 x float> [[VCVT_F32_F161_I]]
test_vcvt_f32_f16(float16x4_t a)2306 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
2307   return vcvt_f32_f16(a);
2308 }
2309 
2310 // CHECK-LABEL: @test_vcvt_n_f32_s32(
2311 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2312 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2313 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2314 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)2315 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
2316   return vcvt_n_f32_s32(a, 1);
2317 }
2318 
2319 // CHECK-LABEL: @test_vcvt_n_f32_u32(
2320 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2321 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2322 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2323 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)2324 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
2325   return vcvt_n_f32_u32(a, 1);
2326 }
2327 
2328 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
2329 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2330 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2331 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2332 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)2333 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
2334   return vcvtq_n_f32_s32(a, 3);
2335 }
2336 
2337 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
2338 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2339 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2340 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2341 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)2342 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
2343   return vcvtq_n_f32_u32(a, 3);
2344 }
2345 
2346 // CHECK-LABEL: @test_vcvt_n_s32_f32(
2347 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2348 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2349 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2350 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)2351 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
2352   return vcvt_n_s32_f32(a, 1);
2353 }
2354 
2355 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
2356 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2357 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2358 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2359 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)2360 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
2361   return vcvtq_n_s32_f32(a, 3);
2362 }
2363 
2364 // CHECK-LABEL: @test_vcvt_n_u32_f32(
2365 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2366 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2367 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2368 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)2369 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
2370   return vcvt_n_u32_f32(a, 1);
2371 }
2372 
2373 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
2374 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2375 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2376 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2377 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)2378 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
2379   return vcvtq_n_u32_f32(a, 3);
2380 }
2381 
2382 // CHECK-LABEL: @test_vcvt_s32_f32(
2383 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2384 // CHECK:   [[VCVT_I:%.*]] = fptosi <2 x float> %a to <2 x i32>
2385 // CHECK:   ret <2 x i32> [[VCVT_I]]
test_vcvt_s32_f32(float32x2_t a)2386 int32x2_t test_vcvt_s32_f32(float32x2_t a) {
2387   return vcvt_s32_f32(a);
2388 }
2389 
2390 // CHECK-LABEL: @test_vcvtq_s32_f32(
2391 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2392 // CHECK:   [[VCVT_I:%.*]] = fptosi <4 x float> %a to <4 x i32>
2393 // CHECK:   ret <4 x i32> [[VCVT_I]]
test_vcvtq_s32_f32(float32x4_t a)2394 int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
2395   return vcvtq_s32_f32(a);
2396 }
2397 
2398 // CHECK-LABEL: @test_vcvt_u32_f32(
2399 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2400 // CHECK:   [[VCVT_I:%.*]] = fptoui <2 x float> %a to <2 x i32>
2401 // CHECK:   ret <2 x i32> [[VCVT_I]]
test_vcvt_u32_f32(float32x2_t a)2402 uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
2403   return vcvt_u32_f32(a);
2404 }
2405 
2406 // CHECK-LABEL: @test_vcvtq_u32_f32(
2407 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2408 // CHECK:   [[VCVT_I:%.*]] = fptoui <4 x float> %a to <4 x i32>
2409 // CHECK:   ret <4 x i32> [[VCVT_I]]
test_vcvtq_u32_f32(float32x4_t a)2410 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
2411   return vcvtq_u32_f32(a);
2412 }
2413 
2414 // CHECK-LABEL: @test_vdup_lane_u8(
2415 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2416 // CHECK:   ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_u8(uint8x8_t a)2417 uint8x8_t test_vdup_lane_u8(uint8x8_t a) {
2418   return vdup_lane_u8(a, 7);
2419 }
2420 
2421 // CHECK-LABEL: @test_vdup_lane_u16(
2422 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2423 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2424 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2425 // CHECK:   ret <4 x i16> [[LANE]]
test_vdup_lane_u16(uint16x4_t a)2426 uint16x4_t test_vdup_lane_u16(uint16x4_t a) {
2427   return vdup_lane_u16(a, 3);
2428 }
2429 
2430 // CHECK-LABEL: @test_vdup_lane_u32(
2431 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2432 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2433 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2434 // CHECK:   ret <2 x i32> [[LANE]]
test_vdup_lane_u32(uint32x2_t a)2435 uint32x2_t test_vdup_lane_u32(uint32x2_t a) {
2436   return vdup_lane_u32(a, 1);
2437 }
2438 
2439 // CHECK-LABEL: @test_vdup_lane_s8(
2440 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2441 // CHECK:   ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_s8(int8x8_t a)2442 int8x8_t test_vdup_lane_s8(int8x8_t a) {
2443   return vdup_lane_s8(a, 7);
2444 }
2445 
2446 // CHECK-LABEL: @test_vdup_lane_s16(
2447 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2448 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2449 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2450 // CHECK:   ret <4 x i16> [[LANE]]
test_vdup_lane_s16(int16x4_t a)2451 int16x4_t test_vdup_lane_s16(int16x4_t a) {
2452   return vdup_lane_s16(a, 3);
2453 }
2454 
2455 // CHECK-LABEL: @test_vdup_lane_s32(
2456 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2457 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2458 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2459 // CHECK:   ret <2 x i32> [[LANE]]
test_vdup_lane_s32(int32x2_t a)2460 int32x2_t test_vdup_lane_s32(int32x2_t a) {
2461   return vdup_lane_s32(a, 1);
2462 }
2463 
2464 // CHECK-LABEL: @test_vdup_lane_p8(
2465 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2466 // CHECK:   ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_p8(poly8x8_t a)2467 poly8x8_t test_vdup_lane_p8(poly8x8_t a) {
2468   return vdup_lane_p8(a, 7);
2469 }
2470 
2471 // CHECK-LABEL: @test_vdup_lane_p16(
2472 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2473 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2474 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2475 // CHECK:   ret <4 x i16> [[LANE]]
test_vdup_lane_p16(poly16x4_t a)2476 poly16x4_t test_vdup_lane_p16(poly16x4_t a) {
2477   return vdup_lane_p16(a, 3);
2478 }
2479 
2480 // CHECK-LABEL: @test_vdup_lane_f32(
2481 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2482 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2483 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
2484 // CHECK:   ret <2 x float> [[LANE]]
test_vdup_lane_f32(float32x2_t a)2485 float32x2_t test_vdup_lane_f32(float32x2_t a) {
2486   return vdup_lane_f32(a, 1);
2487 }
2488 
2489 // CHECK-LABEL: @test_vdupq_lane_u8(
2490 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2491 // CHECK:   ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_u8(uint8x8_t a)2492 uint8x16_t test_vdupq_lane_u8(uint8x8_t a) {
2493   return vdupq_lane_u8(a, 7);
2494 }
2495 
2496 // CHECK-LABEL: @test_vdupq_lane_u16(
2497 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2498 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2499 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2500 // CHECK:   ret <8 x i16> [[LANE]]
test_vdupq_lane_u16(uint16x4_t a)2501 uint16x8_t test_vdupq_lane_u16(uint16x4_t a) {
2502   return vdupq_lane_u16(a, 3);
2503 }
2504 
2505 // CHECK-LABEL: @test_vdupq_lane_u32(
2506 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2507 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2508 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2509 // CHECK:   ret <4 x i32> [[LANE]]
test_vdupq_lane_u32(uint32x2_t a)2510 uint32x4_t test_vdupq_lane_u32(uint32x2_t a) {
2511   return vdupq_lane_u32(a, 1);
2512 }
2513 
2514 // CHECK-LABEL: @test_vdupq_lane_s8(
2515 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2516 // CHECK:   ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_s8(int8x8_t a)2517 int8x16_t test_vdupq_lane_s8(int8x8_t a) {
2518   return vdupq_lane_s8(a, 7);
2519 }
2520 
2521 // CHECK-LABEL: @test_vdupq_lane_s16(
2522 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2523 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2524 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2525 // CHECK:   ret <8 x i16> [[LANE]]
test_vdupq_lane_s16(int16x4_t a)2526 int16x8_t test_vdupq_lane_s16(int16x4_t a) {
2527   return vdupq_lane_s16(a, 3);
2528 }
2529 
2530 // CHECK-LABEL: @test_vdupq_lane_s32(
2531 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2532 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2533 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2534 // CHECK:   ret <4 x i32> [[LANE]]
test_vdupq_lane_s32(int32x2_t a)2535 int32x4_t test_vdupq_lane_s32(int32x2_t a) {
2536   return vdupq_lane_s32(a, 1);
2537 }
2538 
2539 // CHECK-LABEL: @test_vdupq_lane_p8(
2540 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2541 // CHECK:   ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_p8(poly8x8_t a)2542 poly8x16_t test_vdupq_lane_p8(poly8x8_t a) {
2543   return vdupq_lane_p8(a, 7);
2544 }
2545 
2546 // CHECK-LABEL: @test_vdupq_lane_p16(
2547 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2548 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2549 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2550 // CHECK:   ret <8 x i16> [[LANE]]
test_vdupq_lane_p16(poly16x4_t a)2551 poly16x8_t test_vdupq_lane_p16(poly16x4_t a) {
2552   return vdupq_lane_p16(a, 3);
2553 }
2554 
2555 // CHECK-LABEL: @test_vdupq_lane_f32(
2556 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2557 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2558 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2559 // CHECK:   ret <4 x float> [[LANE]]
test_vdupq_lane_f32(float32x2_t a)2560 float32x4_t test_vdupq_lane_f32(float32x2_t a) {
2561   return vdupq_lane_f32(a, 1);
2562 }
2563 
2564 // CHECK-LABEL: @test_vdup_lane_s64(
2565 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2566 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2567 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2568 // CHECK:   ret <1 x i64> [[LANE]]
test_vdup_lane_s64(int64x1_t a)2569 int64x1_t test_vdup_lane_s64(int64x1_t a) {
2570   return vdup_lane_s64(a, 0);
2571 }
2572 
2573 // CHECK-LABEL: @test_vdup_lane_u64(
2574 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2575 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2576 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2577 // CHECK:   ret <1 x i64> [[LANE]]
test_vdup_lane_u64(uint64x1_t a)2578 uint64x1_t test_vdup_lane_u64(uint64x1_t a) {
2579   return vdup_lane_u64(a, 0);
2580 }
2581 
2582 // CHECK-LABEL: @test_vdupq_lane_s64(
2583 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2584 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2585 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2586 // CHECK:   ret <2 x i64> [[LANE]]
test_vdupq_lane_s64(int64x1_t a)2587 int64x2_t test_vdupq_lane_s64(int64x1_t a) {
2588   return vdupq_lane_s64(a, 0);
2589 }
2590 
2591 // CHECK-LABEL: @test_vdupq_lane_u64(
2592 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2593 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2594 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2595 // CHECK:   ret <2 x i64> [[LANE]]
test_vdupq_lane_u64(uint64x1_t a)2596 uint64x2_t test_vdupq_lane_u64(uint64x1_t a) {
2597   return vdupq_lane_u64(a, 0);
2598 }
2599 
2600 // CHECK-LABEL: @test_vdup_n_u8(
2601 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2602 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2603 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2604 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2605 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2606 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2607 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2608 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2609 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_u8(uint8_t a)2610 uint8x8_t test_vdup_n_u8(uint8_t a) {
2611   return vdup_n_u8(a);
2612 }
2613 
2614 // CHECK-LABEL: @test_vdup_n_u16(
2615 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2616 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2617 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2618 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2619 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_u16(uint16_t a)2620 uint16x4_t test_vdup_n_u16(uint16_t a) {
2621   return vdup_n_u16(a);
2622 }
2623 
2624 // CHECK-LABEL: @test_vdup_n_u32(
2625 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2626 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2627 // CHECK:   ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_u32(uint32_t a)2628 uint32x2_t test_vdup_n_u32(uint32_t a) {
2629   return vdup_n_u32(a);
2630 }
2631 
2632 // CHECK-LABEL: @test_vdup_n_s8(
2633 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2634 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2635 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2636 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2637 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2638 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2639 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2640 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2641 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_s8(int8_t a)2642 int8x8_t test_vdup_n_s8(int8_t a) {
2643   return vdup_n_s8(a);
2644 }
2645 
2646 // CHECK-LABEL: @test_vdup_n_s16(
2647 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2648 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2649 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2650 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2651 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_s16(int16_t a)2652 int16x4_t test_vdup_n_s16(int16_t a) {
2653   return vdup_n_s16(a);
2654 }
2655 
2656 // CHECK-LABEL: @test_vdup_n_s32(
2657 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2658 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2659 // CHECK:   ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_s32(int32_t a)2660 int32x2_t test_vdup_n_s32(int32_t a) {
2661   return vdup_n_s32(a);
2662 }
2663 
2664 // CHECK-LABEL: @test_vdup_n_p8(
2665 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2666 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2667 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2668 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2669 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2670 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2671 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2672 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2673 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_p8(poly8_t a)2674 poly8x8_t test_vdup_n_p8(poly8_t a) {
2675   return vdup_n_p8(a);
2676 }
2677 
2678 // CHECK-LABEL: @test_vdup_n_p16(
2679 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2680 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2681 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2682 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2683 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_p16(poly16_t a)2684 poly16x4_t test_vdup_n_p16(poly16_t a) {
2685   return vdup_n_p16(a);
2686 }
2687 
2688 // CHECK-LABEL: @test_vdup_n_f16(
2689 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
2690 // CHECK:   [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
2691 // CHECK:   [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
2692 // CHECK:   [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
2693 // CHECK:   [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
2694 // CHECK:   ret <4 x half> [[VECINIT3]]
test_vdup_n_f16(float16_t * a)2695 float16x4_t test_vdup_n_f16(float16_t *a) {
2696   return vdup_n_f16(*a);
2697 }
2698 
2699 // CHECK-LABEL: @test_vdup_n_f32(
2700 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
2701 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
2702 // CHECK:   ret <2 x float> [[VECINIT1_I]]
test_vdup_n_f32(float32_t a)2703 float32x2_t test_vdup_n_f32(float32_t a) {
2704   return vdup_n_f32(a);
2705 }
2706 
2707 // CHECK-LABEL: @test_vdupq_n_u8(
2708 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2709 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2710 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2711 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2712 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2713 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2714 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2715 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2716 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2717 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2718 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2719 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2720 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2721 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2722 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2723 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2724 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_u8(uint8_t a)2725 uint8x16_t test_vdupq_n_u8(uint8_t a) {
2726   return vdupq_n_u8(a);
2727 }
2728 
2729 // CHECK-LABEL: @test_vdupq_n_u16(
2730 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2731 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2732 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2733 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2734 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2735 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2736 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2737 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2738 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_u16(uint16_t a)2739 uint16x8_t test_vdupq_n_u16(uint16_t a) {
2740   return vdupq_n_u16(a);
2741 }
2742 
2743 // CHECK-LABEL: @test_vdupq_n_u32(
2744 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2745 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2746 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2747 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2748 // CHECK:   ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_u32(uint32_t a)2749 uint32x4_t test_vdupq_n_u32(uint32_t a) {
2750   return vdupq_n_u32(a);
2751 }
2752 
2753 // CHECK-LABEL: @test_vdupq_n_s8(
2754 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2755 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2756 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2757 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2758 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2759 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2760 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2761 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2762 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2763 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2764 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2765 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2766 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2767 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2768 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2769 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2770 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_s8(int8_t a)2771 int8x16_t test_vdupq_n_s8(int8_t a) {
2772   return vdupq_n_s8(a);
2773 }
2774 
2775 // CHECK-LABEL: @test_vdupq_n_s16(
2776 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2777 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2778 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2779 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2780 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2781 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2782 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2783 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2784 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_s16(int16_t a)2785 int16x8_t test_vdupq_n_s16(int16_t a) {
2786   return vdupq_n_s16(a);
2787 }
2788 
2789 // CHECK-LABEL: @test_vdupq_n_s32(
2790 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2791 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2792 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2793 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2794 // CHECK:   ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_s32(int32_t a)2795 int32x4_t test_vdupq_n_s32(int32_t a) {
2796   return vdupq_n_s32(a);
2797 }
2798 
2799 // CHECK-LABEL: @test_vdupq_n_p8(
2800 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2801 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2802 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2803 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2804 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2805 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2806 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2807 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2808 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2809 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2810 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2811 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2812 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2813 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2814 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2815 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2816 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_p8(poly8_t a)2817 poly8x16_t test_vdupq_n_p8(poly8_t a) {
2818   return vdupq_n_p8(a);
2819 }
2820 
2821 // CHECK-LABEL: @test_vdupq_n_p16(
2822 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2823 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2824 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2825 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2826 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2827 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2828 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2829 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2830 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_p16(poly16_t a)2831 poly16x8_t test_vdupq_n_p16(poly16_t a) {
2832   return vdupq_n_p16(a);
2833 }
2834 
2835 // CHECK-LABEL: @test_vdupq_n_f16(
2836 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
2837 // CHECK:   [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
2838 // CHECK:   [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
2839 // CHECK:   [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
2840 // CHECK:   [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
2841 // CHECK:   [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
2842 // CHECK:   [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
2843 // CHECK:   [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
2844 // CHECK:   [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
2845 // CHECK:   ret <8 x half> [[VECINIT7]]
test_vdupq_n_f16(float16_t * a)2846 float16x8_t test_vdupq_n_f16(float16_t *a) {
2847   return vdupq_n_f16(*a);
2848 }
2849 
2850 // CHECK-LABEL: @test_vdupq_n_f32(
2851 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
2852 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
2853 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
2854 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
2855 // CHECK:   ret <4 x float> [[VECINIT3_I]]
test_vdupq_n_f32(float32_t a)2856 float32x4_t test_vdupq_n_f32(float32_t a) {
2857   return vdupq_n_f32(a);
2858 }
2859 
2860 // CHECK-LABEL: @test_vdup_n_s64(
2861 // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2862 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2863 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vdup_n_s64(int64_t a)2864 int64x1_t test_vdup_n_s64(int64_t a) {
2865   int64x1_t tmp = vdup_n_s64(a);
2866   return vadd_s64(tmp, tmp);
2867 }
2868 
2869 // CHECK-LABEL: @test_vdup_n_u64(
2870 // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2871 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2872 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vdup_n_u64(uint64_t a)2873 int64x1_t test_vdup_n_u64(uint64_t a) {
2874   int64x1_t tmp = (int64x1_t)vdup_n_u64(a);
2875   return vadd_s64(tmp, tmp);
2876 }
2877 
2878 // CHECK-LABEL: @test_vdupq_n_s64(
2879 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2880 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2881 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2882 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vdupq_n_s64(int64_t a)2883 int64x2_t test_vdupq_n_s64(int64_t a) {
2884   int64x2_t tmp = vdupq_n_s64(a);
2885   return vaddq_s64(tmp, tmp);
2886 }
2887 
2888 // CHECK-LABEL: @test_vdupq_n_u64(
2889 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2890 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2891 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2892 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vdupq_n_u64(uint64_t a)2893 uint64x2_t test_vdupq_n_u64(uint64_t a) {
2894   uint64x2_t tmp = vdupq_n_u64(a);
2895   return vaddq_u64(tmp, tmp);
2896 }
2897 
2898 // CHECK-LABEL: @test_veor_s8(
2899 // CHECK:   [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2900 // CHECK:   ret <8 x i8> [[XOR_I]]
test_veor_s8(int8x8_t a,int8x8_t b)2901 int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
2902   return veor_s8(a, b);
2903 }
2904 
2905 // CHECK-LABEL: @test_veor_s16(
2906 // CHECK:   [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2907 // CHECK:   ret <4 x i16> [[XOR_I]]
test_veor_s16(int16x4_t a,int16x4_t b)2908 int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
2909   return veor_s16(a, b);
2910 }
2911 
2912 // CHECK-LABEL: @test_veor_s32(
2913 // CHECK:   [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2914 // CHECK:   ret <2 x i32> [[XOR_I]]
test_veor_s32(int32x2_t a,int32x2_t b)2915 int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
2916   return veor_s32(a, b);
2917 }
2918 
2919 // CHECK-LABEL: @test_veor_s64(
2920 // CHECK:   [[XOR_I:%.*]] = xor <1 x i64> %a, %b
2921 // CHECK:   ret <1 x i64> [[XOR_I]]
test_veor_s64(int64x1_t a,int64x1_t b)2922 int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
2923   return veor_s64(a, b);
2924 }
2925 
2926 // CHECK-LABEL: @test_veor_u8(
2927 // CHECK:   [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2928 // CHECK:   ret <8 x i8> [[XOR_I]]
test_veor_u8(uint8x8_t a,uint8x8_t b)2929 uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
2930   return veor_u8(a, b);
2931 }
2932 
2933 // CHECK-LABEL: @test_veor_u16(
2934 // CHECK:   [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2935 // CHECK:   ret <4 x i16> [[XOR_I]]
test_veor_u16(uint16x4_t a,uint16x4_t b)2936 uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
2937   return veor_u16(a, b);
2938 }
2939 
2940 // CHECK-LABEL: @test_veor_u32(
2941 // CHECK:   [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2942 // CHECK:   ret <2 x i32> [[XOR_I]]
test_veor_u32(uint32x2_t a,uint32x2_t b)2943 uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
2944   return veor_u32(a, b);
2945 }
2946 
2947 // CHECK-LABEL: @test_veor_u64(
2948 // CHECK:   [[XOR_I:%.*]] = xor <1 x i64> %a, %b
2949 // CHECK:   ret <1 x i64> [[XOR_I]]
test_veor_u64(uint64x1_t a,uint64x1_t b)2950 uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
2951   return veor_u64(a, b);
2952 }
2953 
2954 // CHECK-LABEL: @test_veorq_s8(
2955 // CHECK:   [[XOR_I:%.*]] = xor <16 x i8> %a, %b
2956 // CHECK:   ret <16 x i8> [[XOR_I]]
test_veorq_s8(int8x16_t a,int8x16_t b)2957 int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
2958   return veorq_s8(a, b);
2959 }
2960 
2961 // CHECK-LABEL: @test_veorq_s16(
2962 // CHECK:   [[XOR_I:%.*]] = xor <8 x i16> %a, %b
2963 // CHECK:   ret <8 x i16> [[XOR_I]]
test_veorq_s16(int16x8_t a,int16x8_t b)2964 int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
2965   return veorq_s16(a, b);
2966 }
2967 
2968 // CHECK-LABEL: @test_veorq_s32(
2969 // CHECK:   [[XOR_I:%.*]] = xor <4 x i32> %a, %b
2970 // CHECK:   ret <4 x i32> [[XOR_I]]
test_veorq_s32(int32x4_t a,int32x4_t b)2971 int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
2972   return veorq_s32(a, b);
2973 }
2974 
2975 // CHECK-LABEL: @test_veorq_s64(
2976 // CHECK:   [[XOR_I:%.*]] = xor <2 x i64> %a, %b
2977 // CHECK:   ret <2 x i64> [[XOR_I]]
test_veorq_s64(int64x2_t a,int64x2_t b)2978 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
2979   return veorq_s64(a, b);
2980 }
2981 
2982 // CHECK-LABEL: @test_veorq_u8(
2983 // CHECK:   [[XOR_I:%.*]] = xor <16 x i8> %a, %b
2984 // CHECK:   ret <16 x i8> [[XOR_I]]
test_veorq_u8(uint8x16_t a,uint8x16_t b)2985 uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
2986   return veorq_u8(a, b);
2987 }
2988 
2989 // CHECK-LABEL: @test_veorq_u16(
2990 // CHECK:   [[XOR_I:%.*]] = xor <8 x i16> %a, %b
2991 // CHECK:   ret <8 x i16> [[XOR_I]]
test_veorq_u16(uint16x8_t a,uint16x8_t b)2992 uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
2993   return veorq_u16(a, b);
2994 }
2995 
2996 // CHECK-LABEL: @test_veorq_u32(
2997 // CHECK:   [[XOR_I:%.*]] = xor <4 x i32> %a, %b
2998 // CHECK:   ret <4 x i32> [[XOR_I]]
test_veorq_u32(uint32x4_t a,uint32x4_t b)2999 uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
3000   return veorq_u32(a, b);
3001 }
3002 
3003 // CHECK-LABEL: @test_veorq_u64(
3004 // CHECK:   [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3005 // CHECK:   ret <2 x i64> [[XOR_I]]
test_veorq_u64(uint64x2_t a,uint64x2_t b)3006 uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
3007   return veorq_u64(a, b);
3008 }
3009 
3010 // CHECK-LABEL: @test_vext_s8(
3011 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3012 // CHECK:   ret <8 x i8> [[VEXT]]
test_vext_s8(int8x8_t a,int8x8_t b)3013 int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
3014   return vext_s8(a, b, 7);
3015 }
3016 
3017 // CHECK-LABEL: @test_vext_u8(
3018 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3019 // CHECK:   ret <8 x i8> [[VEXT]]
test_vext_u8(uint8x8_t a,uint8x8_t b)3020 uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
3021   return vext_u8(a, b, 7);
3022 }
3023 
3024 // CHECK-LABEL: @test_vext_p8(
3025 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3026 // CHECK:   ret <8 x i8> [[VEXT]]
test_vext_p8(poly8x8_t a,poly8x8_t b)3027 poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
3028   return vext_p8(a, b, 7);
3029 }
3030 
3031 // CHECK-LABEL: @test_vext_s16(
3032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3033 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3034 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3035 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3036 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3037 // CHECK:   ret <4 x i16> [[VEXT]]
test_vext_s16(int16x4_t a,int16x4_t b)3038 int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
3039   return vext_s16(a, b, 3);
3040 }
3041 
3042 // CHECK-LABEL: @test_vext_u16(
3043 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3044 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3045 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3046 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3047 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3048 // CHECK:   ret <4 x i16> [[VEXT]]
test_vext_u16(uint16x4_t a,uint16x4_t b)3049 uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
3050   return vext_u16(a, b, 3);
3051 }
3052 
3053 // CHECK-LABEL: @test_vext_p16(
3054 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3055 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3056 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3057 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3058 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3059 // CHECK:   ret <4 x i16> [[VEXT]]
test_vext_p16(poly16x4_t a,poly16x4_t b)3060 poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
3061   return vext_p16(a, b, 3);
3062 }
3063 
3064 // CHECK-LABEL: @test_vext_s32(
3065 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3066 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3067 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3068 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3069 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3070 // CHECK:   ret <2 x i32> [[VEXT]]
test_vext_s32(int32x2_t a,int32x2_t b)3071 int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
3072   return vext_s32(a, b, 1);
3073 }
3074 
3075 // CHECK-LABEL: @test_vext_u32(
3076 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3077 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3078 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3079 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3080 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3081 // CHECK:   ret <2 x i32> [[VEXT]]
test_vext_u32(uint32x2_t a,uint32x2_t b)3082 uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
3083   return vext_u32(a, b, 1);
3084 }
3085 
3086 // CHECK-LABEL: @test_vext_s64(
3087 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3088 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3089 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3090 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3091 // CHECK:   [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3092 // CHECK:   ret <1 x i64> [[VEXT]]
test_vext_s64(int64x1_t a,int64x1_t b)3093 int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
3094   return vext_s64(a, b, 0);
3095 }
3096 
3097 // CHECK-LABEL: @test_vext_u64(
3098 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3099 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3100 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3101 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3102 // CHECK:   [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3103 // CHECK:   ret <1 x i64> [[VEXT]]
test_vext_u64(uint64x1_t a,uint64x1_t b)3104 uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
3105   return vext_u64(a, b, 0);
3106 }
3107 
3108 // CHECK-LABEL: @test_vext_f32(
3109 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3110 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3111 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3112 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3113 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2>
3114 // CHECK:   ret <2 x float> [[VEXT]]
test_vext_f32(float32x2_t a,float32x2_t b)3115 float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
3116   return vext_f32(a, b, 1);
3117 }
3118 
3119 // CHECK-LABEL: @test_vextq_s8(
3120 // CHECK:   [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3121 // CHECK:   ret <16 x i8> [[VEXT]]
test_vextq_s8(int8x16_t a,int8x16_t b)3122 int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
3123   return vextq_s8(a, b, 15);
3124 }
3125 
3126 // CHECK-LABEL: @test_vextq_u8(
3127 // CHECK:   [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3128 // CHECK:   ret <16 x i8> [[VEXT]]
test_vextq_u8(uint8x16_t a,uint8x16_t b)3129 uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
3130   return vextq_u8(a, b, 15);
3131 }
3132 
3133 // CHECK-LABEL: @test_vextq_p8(
3134 // CHECK:   [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3135 // CHECK:   ret <16 x i8> [[VEXT]]
test_vextq_p8(poly8x16_t a,poly8x16_t b)3136 poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
3137   return vextq_p8(a, b, 15);
3138 }
3139 
3140 // CHECK-LABEL: @test_vextq_s16(
3141 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3142 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3143 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3144 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3145 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3146 // CHECK:   ret <8 x i16> [[VEXT]]
test_vextq_s16(int16x8_t a,int16x8_t b)3147 int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
3148   return vextq_s16(a, b, 7);
3149 }
3150 
3151 // CHECK-LABEL: @test_vextq_u16(
3152 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3153 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3154 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3155 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3156 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3157 // CHECK:   ret <8 x i16> [[VEXT]]
test_vextq_u16(uint16x8_t a,uint16x8_t b)3158 uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
3159   return vextq_u16(a, b, 7);
3160 }
3161 
3162 // CHECK-LABEL: @test_vextq_p16(
3163 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3164 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3165 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3166 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3167 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3168 // CHECK:   ret <8 x i16> [[VEXT]]
test_vextq_p16(poly16x8_t a,poly16x8_t b)3169 poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
3170   return vextq_p16(a, b, 7);
3171 }
3172 
3173 // CHECK-LABEL: @test_vextq_s32(
3174 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3175 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3176 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3177 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3178 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3179 // CHECK:   ret <4 x i32> [[VEXT]]
test_vextq_s32(int32x4_t a,int32x4_t b)3180 int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
3181   return vextq_s32(a, b, 3);
3182 }
3183 
3184 // CHECK-LABEL: @test_vextq_u32(
3185 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3186 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3187 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3188 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3189 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3190 // CHECK:   ret <4 x i32> [[VEXT]]
test_vextq_u32(uint32x4_t a,uint32x4_t b)3191 uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
3192   return vextq_u32(a, b, 3);
3193 }
3194 
3195 // CHECK-LABEL: @test_vextq_s64(
3196 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3197 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3198 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3199 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3200 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3201 // CHECK:   ret <2 x i64> [[VEXT]]
test_vextq_s64(int64x2_t a,int64x2_t b)3202 int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
3203   return vextq_s64(a, b, 1);
3204 }
3205 
3206 // CHECK-LABEL: @test_vextq_u64(
3207 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3208 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3209 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3210 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3211 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3212 // CHECK:   ret <2 x i64> [[VEXT]]
test_vextq_u64(uint64x2_t a,uint64x2_t b)3213 uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
3214   return vextq_u64(a, b, 1);
3215 }
3216 
3217 // CHECK-LABEL: @test_vextq_f32(
3218 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3219 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3220 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3221 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3222 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3223 // CHECK:   ret <4 x float> [[VEXT]]
test_vextq_f32(float32x4_t a,float32x4_t b)3224 float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
3225   return vextq_f32(a, b, 3);
3226 }
3227 
3228 // CHECK-LABEL: @test_vfma_f32(
3229 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3230 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3231 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3232 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a)
3233 // CHECK:   ret <2 x float> [[TMP3]]
test_vfma_f32(float32x2_t a,float32x2_t b,float32x2_t c)3234 float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3235   return vfma_f32(a, b, c);
3236 }
3237 
3238 // CHECK-LABEL: @test_vfmaq_f32(
3239 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3240 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3241 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3242 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a)
3243 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3244 float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3245   return vfmaq_f32(a, b, c);
3246 }
3247 
3248 // CHECK-LABEL: @test_vfms_f32(
3249 // CHECK:   [[SUB_I:%.*]] = fneg <2 x float> %b
3250 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3251 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
3252 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3253 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %c, <2 x float> %a)
3254 // CHECK:   ret <2 x float> [[TMP3]]
test_vfms_f32(float32x2_t a,float32x2_t b,float32x2_t c)3255 float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3256   return vfms_f32(a, b, c);
3257 }
3258 
3259 // CHECK-LABEL: @test_vfmsq_f32(
3260 // CHECK:   [[SUB_I:%.*]] = fneg <4 x float> %b
3261 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3262 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
3263 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3264 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %c, <4 x float> %a)
3265 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3266 float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3267   return vfmsq_f32(a, b, c);
3268 }
3269 
3270 // CHECK-LABEL: @test_vget_high_s8(
3271 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3272 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_s8(int8x16_t a)3273 int8x8_t test_vget_high_s8(int8x16_t a) {
3274   return vget_high_s8(a);
3275 }
3276 
3277 // CHECK-LABEL: @test_vget_high_s16(
3278 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3279 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_s16(int16x8_t a)3280 int16x4_t test_vget_high_s16(int16x8_t a) {
3281   return vget_high_s16(a);
3282 }
3283 
3284 // CHECK-LABEL: @test_vget_high_s32(
3285 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3286 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_s32(int32x4_t a)3287 int32x2_t test_vget_high_s32(int32x4_t a) {
3288   return vget_high_s32(a);
3289 }
3290 
3291 // CHECK-LABEL: @test_vget_high_s64(
3292 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3293 // CHECK:   ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_s64(int64x2_t a)3294 int64x1_t test_vget_high_s64(int64x2_t a) {
3295   return vget_high_s64(a);
3296 }
3297 
3298 // CHECK-LABEL: @test_vget_high_f16(
3299 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3300 // CHECK:   ret <4 x half> [[SHUFFLE_I]]
test_vget_high_f16(float16x8_t a)3301 float16x4_t test_vget_high_f16(float16x8_t a) {
3302   return vget_high_f16(a);
3303 }
3304 
3305 // CHECK-LABEL: @test_vget_high_f32(
3306 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
3307 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vget_high_f32(float32x4_t a)3308 float32x2_t test_vget_high_f32(float32x4_t a) {
3309   return vget_high_f32(a);
3310 }
3311 
3312 // CHECK-LABEL: @test_vget_high_u8(
3313 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3314 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_u8(uint8x16_t a)3315 uint8x8_t test_vget_high_u8(uint8x16_t a) {
3316   return vget_high_u8(a);
3317 }
3318 
3319 // CHECK-LABEL: @test_vget_high_u16(
3320 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3321 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_u16(uint16x8_t a)3322 uint16x4_t test_vget_high_u16(uint16x8_t a) {
3323   return vget_high_u16(a);
3324 }
3325 
3326 // CHECK-LABEL: @test_vget_high_u32(
3327 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3328 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_u32(uint32x4_t a)3329 uint32x2_t test_vget_high_u32(uint32x4_t a) {
3330   return vget_high_u32(a);
3331 }
3332 
3333 // CHECK-LABEL: @test_vget_high_u64(
3334 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3335 // CHECK:   ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_u64(uint64x2_t a)3336 uint64x1_t test_vget_high_u64(uint64x2_t a) {
3337   return vget_high_u64(a);
3338 }
3339 
3340 // CHECK-LABEL: @test_vget_high_p8(
3341 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3342 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_p8(poly8x16_t a)3343 poly8x8_t test_vget_high_p8(poly8x16_t a) {
3344   return vget_high_p8(a);
3345 }
3346 
3347 // CHECK-LABEL: @test_vget_high_p16(
3348 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3349 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_p16(poly16x8_t a)3350 poly16x4_t test_vget_high_p16(poly16x8_t a) {
3351   return vget_high_p16(a);
3352 }
3353 
3354 // CHECK-LABEL: @test_vget_lane_u8(
3355 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3356 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_u8(uint8x8_t a)3357 uint8_t test_vget_lane_u8(uint8x8_t a) {
3358   return vget_lane_u8(a, 7);
3359 }
3360 
3361 // CHECK-LABEL: @test_vget_lane_u16(
3362 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3363 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_u16(uint16x4_t a)3364 uint16_t test_vget_lane_u16(uint16x4_t a) {
3365   return vget_lane_u16(a, 3);
3366 }
3367 
3368 // CHECK-LABEL: @test_vget_lane_u32(
3369 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3370 // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_u32(uint32x2_t a)3371 uint32_t test_vget_lane_u32(uint32x2_t a) {
3372   return vget_lane_u32(a, 1);
3373 }
3374 
3375 // CHECK-LABEL: @test_vget_lane_s8(
3376 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3377 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_s8(int8x8_t a)3378 int8_t test_vget_lane_s8(int8x8_t a) {
3379   return vget_lane_s8(a, 7);
3380 }
3381 
3382 // CHECK-LABEL: @test_vget_lane_s16(
3383 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3384 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_s16(int16x4_t a)3385 int16_t test_vget_lane_s16(int16x4_t a) {
3386   return vget_lane_s16(a, 3);
3387 }
3388 
3389 // CHECK-LABEL: @test_vget_lane_s32(
3390 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3391 // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_s32(int32x2_t a)3392 int32_t test_vget_lane_s32(int32x2_t a) {
3393   return vget_lane_s32(a, 1);
3394 }
3395 
3396 // CHECK-LABEL: @test_vget_lane_p8(
3397 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3398 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_p8(poly8x8_t a)3399 poly8_t test_vget_lane_p8(poly8x8_t a) {
3400   return vget_lane_p8(a, 7);
3401 }
3402 
3403 // CHECK-LABEL: @test_vget_lane_p16(
3404 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3405 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_p16(poly16x4_t a)3406 poly16_t test_vget_lane_p16(poly16x4_t a) {
3407   return vget_lane_p16(a, 3);
3408 }
3409 
3410 // CHECK-LABEL: @test_vget_lane_f32(
3411 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
3412 // CHECK:   ret float [[VGET_LANE]]
test_vget_lane_f32(float32x2_t a)3413 float32_t test_vget_lane_f32(float32x2_t a) {
3414   return vget_lane_f32(a, 1);
3415 }
3416 
3417 // CHECK-LABEL: @test_vget_lane_f16(
3418 // CHECK:   [[__REINT_242:%.*]] = alloca <4 x half>, align 8
3419 // CHECK:   [[__REINT1_242:%.*]] = alloca i16, align 2
3420 // CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
3421 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
3422 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
3423 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
3424 // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
3425 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
3426 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3427 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
3428 // CHECK:   ret float [[CONV]]
test_vget_lane_f16(float16x4_t a)3429 float32_t test_vget_lane_f16(float16x4_t a) {
3430   return vget_lane_f16(a, 1);
3431 }
3432 
3433 // CHECK-LABEL: @test_vgetq_lane_u8(
3434 // CHECK:   [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3435 // CHECK:   ret i8 [[VGET_LANE]]
test_vgetq_lane_u8(uint8x16_t a)3436 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
3437   return vgetq_lane_u8(a, 15);
3438 }
3439 
3440 // CHECK-LABEL: @test_vgetq_lane_u16(
3441 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3442 // CHECK:   ret i16 [[VGET_LANE]]
test_vgetq_lane_u16(uint16x8_t a)3443 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
3444   return vgetq_lane_u16(a, 7);
3445 }
3446 
3447 // CHECK-LABEL: @test_vgetq_lane_u32(
3448 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3449 // CHECK:   ret i32 [[VGET_LANE]]
test_vgetq_lane_u32(uint32x4_t a)3450 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
3451   return vgetq_lane_u32(a, 3);
3452 }
3453 
3454 // CHECK-LABEL: @test_vgetq_lane_s8(
3455 // CHECK:   [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3456 // CHECK:   ret i8 [[VGET_LANE]]
test_vgetq_lane_s8(int8x16_t a)3457 int8_t test_vgetq_lane_s8(int8x16_t a) {
3458   return vgetq_lane_s8(a, 15);
3459 }
3460 
3461 // CHECK-LABEL: @test_vgetq_lane_s16(
3462 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3463 // CHECK:   ret i16 [[VGET_LANE]]
test_vgetq_lane_s16(int16x8_t a)3464 int16_t test_vgetq_lane_s16(int16x8_t a) {
3465   return vgetq_lane_s16(a, 7);
3466 }
3467 
3468 // CHECK-LABEL: @test_vgetq_lane_s32(
3469 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3470 // CHECK:   ret i32 [[VGET_LANE]]
test_vgetq_lane_s32(int32x4_t a)3471 int32_t test_vgetq_lane_s32(int32x4_t a) {
3472   return vgetq_lane_s32(a, 3);
3473 }
3474 
3475 // CHECK-LABEL: @test_vgetq_lane_p8(
3476 // CHECK:   [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3477 // CHECK:   ret i8 [[VGET_LANE]]
test_vgetq_lane_p8(poly8x16_t a)3478 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
3479   return vgetq_lane_p8(a, 15);
3480 }
3481 
3482 // CHECK-LABEL: @test_vgetq_lane_p16(
3483 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3484 // CHECK:   ret i16 [[VGET_LANE]]
test_vgetq_lane_p16(poly16x8_t a)3485 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
3486   return vgetq_lane_p16(a, 7);
3487 }
3488 
3489 // CHECK-LABEL: @test_vgetq_lane_f32(
3490 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x float> %a, i32 3
3491 // CHECK:   ret float [[VGET_LANE]]
test_vgetq_lane_f32(float32x4_t a)3492 float32_t test_vgetq_lane_f32(float32x4_t a) {
3493   return vgetq_lane_f32(a, 3);
3494 }
3495 
3496 // CHECK-LABEL: @test_vgetq_lane_f16(
3497 // CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
3498 // CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
3499 // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
3500 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
3501 // CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
3502 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
3503 // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2
3504 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
3505 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3506 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
3507 // CHECK:   ret float [[CONV]]
test_vgetq_lane_f16(float16x8_t a)3508 float32_t test_vgetq_lane_f16(float16x8_t a) {
3509   return vgetq_lane_f16(a, 3);
3510 }
3511 
3512 // CHECK-LABEL: @test_vget_lane_s64(
3513 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3514 // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_s64(int64x1_t a)3515 int64_t test_vget_lane_s64(int64x1_t a) {
3516   return vget_lane_s64(a, 0);
3517 }
3518 
3519 // CHECK-LABEL: @test_vget_lane_u64(
3520 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3521 // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_u64(uint64x1_t a)3522 uint64_t test_vget_lane_u64(uint64x1_t a) {
3523   return vget_lane_u64(a, 0);
3524 }
3525 
3526 // CHECK-LABEL: @test_vgetq_lane_s64(
3527 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3528 // CHECK:   ret i64 [[VGET_LANE]]
test_vgetq_lane_s64(int64x2_t a)3529 int64_t test_vgetq_lane_s64(int64x2_t a) {
3530   return vgetq_lane_s64(a, 1);
3531 }
3532 
3533 // CHECK-LABEL: @test_vgetq_lane_u64(
3534 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3535 // CHECK:   ret i64 [[VGET_LANE]]
test_vgetq_lane_u64(uint64x2_t a)3536 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
3537   return vgetq_lane_u64(a, 1);
3538 }
3539 
3540 // CHECK-LABEL: @test_vget_low_s8(
3541 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3542 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_s8(int8x16_t a)3543 int8x8_t test_vget_low_s8(int8x16_t a) {
3544   return vget_low_s8(a);
3545 }
3546 
3547 // CHECK-LABEL: @test_vget_low_s16(
3548 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3549 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_s16(int16x8_t a)3550 int16x4_t test_vget_low_s16(int16x8_t a) {
3551   return vget_low_s16(a);
3552 }
3553 
3554 // CHECK-LABEL: @test_vget_low_s32(
3555 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3556 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_s32(int32x4_t a)3557 int32x2_t test_vget_low_s32(int32x4_t a) {
3558   return vget_low_s32(a);
3559 }
3560 
3561 // CHECK-LABEL: @test_vget_low_s64(
3562 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3563 // CHECK:   ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_s64(int64x2_t a)3564 int64x1_t test_vget_low_s64(int64x2_t a) {
3565   return vget_low_s64(a);
3566 }
3567 
3568 // CHECK-LABEL: @test_vget_low_f16(
3569 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3570 // CHECK:   ret <4 x half> [[SHUFFLE_I]]
test_vget_low_f16(float16x8_t a)3571 float16x4_t test_vget_low_f16(float16x8_t a) {
3572   return vget_low_f16(a);
3573 }
3574 
3575 // CHECK-LABEL: @test_vget_low_f32(
3576 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
3577 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vget_low_f32(float32x4_t a)3578 float32x2_t test_vget_low_f32(float32x4_t a) {
3579   return vget_low_f32(a);
3580 }
3581 
3582 // CHECK-LABEL: @test_vget_low_u8(
3583 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3584 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_u8(uint8x16_t a)3585 uint8x8_t test_vget_low_u8(uint8x16_t a) {
3586   return vget_low_u8(a);
3587 }
3588 
3589 // CHECK-LABEL: @test_vget_low_u16(
3590 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3591 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_u16(uint16x8_t a)3592 uint16x4_t test_vget_low_u16(uint16x8_t a) {
3593   return vget_low_u16(a);
3594 }
3595 
3596 // CHECK-LABEL: @test_vget_low_u32(
3597 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3598 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_u32(uint32x4_t a)3599 uint32x2_t test_vget_low_u32(uint32x4_t a) {
3600   return vget_low_u32(a);
3601 }
3602 
3603 // CHECK-LABEL: @test_vget_low_u64(
3604 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3605 // CHECK:   ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_u64(uint64x2_t a)3606 uint64x1_t test_vget_low_u64(uint64x2_t a) {
3607   return vget_low_u64(a);
3608 }
3609 
3610 // CHECK-LABEL: @test_vget_low_p8(
3611 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3612 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_p8(poly8x16_t a)3613 poly8x8_t test_vget_low_p8(poly8x16_t a) {
3614   return vget_low_p8(a);
3615 }
3616 
3617 // CHECK-LABEL: @test_vget_low_p16(
3618 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3619 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_p16(poly16x8_t a)3620 poly16x4_t test_vget_low_p16(poly16x8_t a) {
3621   return vget_low_p16(a);
3622 }
3623 
3624 // CHECK-LABEL: @test_vhadd_s8(
3625 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
3626 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t a,int8x8_t b)3627 int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) {
3628   return vhadd_s8(a, b);
3629 }
3630 
3631 // CHECK-LABEL: @test_vhadd_s16(
3632 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3633 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3634 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
3635 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3636 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_s16(int16x4_t a,int16x4_t b)3637 int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) {
3638   return vhadd_s16(a, b);
3639 }
3640 
3641 // CHECK-LABEL: @test_vhadd_s32(
3642 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3643 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3644 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
3645 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3646 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_s32(int32x2_t a,int32x2_t b)3647 int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) {
3648   return vhadd_s32(a, b);
3649 }
3650 
3651 // CHECK-LABEL: @test_vhadd_u8(
3652 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
3653 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t a,uint8x8_t b)3654 uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) {
3655   return vhadd_u8(a, b);
3656 }
3657 
3658 // CHECK-LABEL: @test_vhadd_u16(
3659 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3660 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3661 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
3662 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3663 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_u16(uint16x4_t a,uint16x4_t b)3664 uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) {
3665   return vhadd_u16(a, b);
3666 }
3667 
3668 // CHECK-LABEL: @test_vhadd_u32(
3669 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3670 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3671 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
3672 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3673 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_u32(uint32x2_t a,uint32x2_t b)3674 uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) {
3675   return vhadd_u32(a, b);
3676 }
3677 
3678 // CHECK-LABEL: @test_vhaddq_s8(
3679 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
3680 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t a,int8x16_t b)3681 int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) {
3682   return vhaddq_s8(a, b);
3683 }
3684 
3685 // CHECK-LABEL: @test_vhaddq_s16(
3686 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3687 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3688 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
3689 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3690 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_s16(int16x8_t a,int16x8_t b)3691 int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) {
3692   return vhaddq_s16(a, b);
3693 }
3694 
3695 // CHECK-LABEL: @test_vhaddq_s32(
3696 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3697 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3698 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
3699 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3700 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_s32(int32x4_t a,int32x4_t b)3701 int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) {
3702   return vhaddq_s32(a, b);
3703 }
3704 
3705 // CHECK-LABEL: @test_vhaddq_u8(
3706 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
3707 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t a,uint8x16_t b)3708 uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) {
3709   return vhaddq_u8(a, b);
3710 }
3711 
3712 // CHECK-LABEL: @test_vhaddq_u16(
3713 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3714 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3715 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
3716 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3717 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_u16(uint16x8_t a,uint16x8_t b)3718 uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) {
3719   return vhaddq_u16(a, b);
3720 }
3721 
3722 // CHECK-LABEL: @test_vhaddq_u32(
3723 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3724 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3725 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
3726 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3727 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_u32(uint32x4_t a,uint32x4_t b)3728 uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) {
3729   return vhaddq_u32(a, b);
3730 }
3731 
3732 // CHECK-LABEL: @test_vhsub_s8(
3733 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b)
3734 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t a,int8x8_t b)3735 int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) {
3736   return vhsub_s8(a, b);
3737 }
3738 
3739 // CHECK-LABEL: @test_vhsub_s16(
3740 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3741 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3742 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b)
3743 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3744 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_s16(int16x4_t a,int16x4_t b)3745 int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) {
3746   return vhsub_s16(a, b);
3747 }
3748 
3749 // CHECK-LABEL: @test_vhsub_s32(
3750 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3751 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3752 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b)
3753 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3754 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_s32(int32x2_t a,int32x2_t b)3755 int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) {
3756   return vhsub_s32(a, b);
3757 }
3758 
3759 // CHECK-LABEL: @test_vhsub_u8(
3760 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b)
3761 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t a,uint8x8_t b)3762 uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) {
3763   return vhsub_u8(a, b);
3764 }
3765 
3766 // CHECK-LABEL: @test_vhsub_u16(
3767 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3768 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3769 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b)
3770 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3771 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_u16(uint16x4_t a,uint16x4_t b)3772 uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) {
3773   return vhsub_u16(a, b);
3774 }
3775 
3776 // CHECK-LABEL: @test_vhsub_u32(
3777 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3778 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3779 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b)
3780 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3781 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_u32(uint32x2_t a,uint32x2_t b)3782 uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) {
3783   return vhsub_u32(a, b);
3784 }
3785 
3786 // CHECK-LABEL: @test_vhsubq_s8(
3787 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b)
3788 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t a,int8x16_t b)3789 int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) {
3790   return vhsubq_s8(a, b);
3791 }
3792 
3793 // CHECK-LABEL: @test_vhsubq_s16(
3794 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3795 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3796 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b)
3797 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3798 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_s16(int16x8_t a,int16x8_t b)3799 int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) {
3800   return vhsubq_s16(a, b);
3801 }
3802 
3803 // CHECK-LABEL: @test_vhsubq_s32(
3804 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3805 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3806 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b)
3807 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3808 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_s32(int32x4_t a,int32x4_t b)3809 int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) {
3810   return vhsubq_s32(a, b);
3811 }
3812 
3813 // CHECK-LABEL: @test_vhsubq_u8(
3814 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b)
3815 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t a,uint8x16_t b)3816 uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) {
3817   return vhsubq_u8(a, b);
3818 }
3819 
3820 // CHECK-LABEL: @test_vhsubq_u16(
3821 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3822 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3823 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b)
3824 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3825 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_u16(uint16x8_t a,uint16x8_t b)3826 uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) {
3827   return vhsubq_u16(a, b);
3828 }
3829 
3830 // CHECK-LABEL: @test_vhsubq_u32(
3831 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3832 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3833 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b)
3834 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3835 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_u32(uint32x4_t a,uint32x4_t b)3836 uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) {
3837   return vhsubq_u32(a, b);
3838 }
3839 
3840 // CHECK-LABEL: @test_vld1q_u8(
3841 // CHECK:   [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3842 // CHECK:   ret <16 x i8> [[VLD1]]
test_vld1q_u8(uint8_t const * a)3843 uint8x16_t test_vld1q_u8(uint8_t const * a) {
3844   return vld1q_u8(a);
3845 }
3846 
3847 // CHECK-LABEL: @test_vld1q_u16(
3848 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
3849 // CHECK:   [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3850 // CHECK:   ret <8 x i16> [[VLD1]]
test_vld1q_u16(uint16_t const * a)3851 uint16x8_t test_vld1q_u16(uint16_t const * a) {
3852   return vld1q_u16(a);
3853 }
3854 
3855 // CHECK-LABEL: @test_vld1q_u32(
3856 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
3857 // CHECK:   [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
3858 // CHECK:   ret <4 x i32> [[VLD1]]
test_vld1q_u32(uint32_t const * a)3859 uint32x4_t test_vld1q_u32(uint32_t const * a) {
3860   return vld1q_u32(a);
3861 }
3862 
3863 // CHECK-LABEL: @test_vld1q_u64(
3864 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
3865 // CHECK:   [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
3866 // CHECK:   ret <2 x i64> [[VLD1]]
test_vld1q_u64(uint64_t const * a)3867 uint64x2_t test_vld1q_u64(uint64_t const * a) {
3868   return vld1q_u64(a);
3869 }
3870 
3871 // CHECK-LABEL: @test_vld1q_s8(
3872 // CHECK:   [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3873 // CHECK:   ret <16 x i8> [[VLD1]]
test_vld1q_s8(int8_t const * a)3874 int8x16_t test_vld1q_s8(int8_t const * a) {
3875   return vld1q_s8(a);
3876 }
3877 
3878 // CHECK-LABEL: @test_vld1q_s16(
3879 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
3880 // CHECK:   [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3881 // CHECK:   ret <8 x i16> [[VLD1]]
test_vld1q_s16(int16_t const * a)3882 int16x8_t test_vld1q_s16(int16_t const * a) {
3883   return vld1q_s16(a);
3884 }
3885 
3886 // CHECK-LABEL: @test_vld1q_s32(
3887 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
3888 // CHECK:   [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
3889 // CHECK:   ret <4 x i32> [[VLD1]]
test_vld1q_s32(int32_t const * a)3890 int32x4_t test_vld1q_s32(int32_t const * a) {
3891   return vld1q_s32(a);
3892 }
3893 
3894 // CHECK-LABEL: @test_vld1q_s64(
3895 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
3896 // CHECK:   [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
3897 // CHECK:   ret <2 x i64> [[VLD1]]
test_vld1q_s64(int64_t const * a)3898 int64x2_t test_vld1q_s64(int64_t const * a) {
3899   return vld1q_s64(a);
3900 }
3901 
3902 // CHECK-LABEL: @test_vld1q_f16(
3903 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
3904 // CHECK:   [[VLD1:%.*]] = call <8 x half> @llvm.arm.neon.vld1.v8f16.p0i8(i8* [[TMP0]], i32 2)
3905 // CHECK:   ret <8 x half> [[VLD1]]
test_vld1q_f16(float16_t const * a)3906 float16x8_t test_vld1q_f16(float16_t const * a) {
3907   return vld1q_f16(a);
3908 }
3909 
3910 // CHECK-LABEL: @test_vld1q_f32(
3911 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
3912 // CHECK:   [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* [[TMP0]], i32 4)
3913 // CHECK:   ret <4 x float> [[VLD1]]
test_vld1q_f32(float32_t const * a)3914 float32x4_t test_vld1q_f32(float32_t const * a) {
3915   return vld1q_f32(a);
3916 }
3917 
3918 // CHECK-LABEL: @test_vld1q_p8(
3919 // CHECK:   [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3920 // CHECK:   ret <16 x i8> [[VLD1]]
test_vld1q_p8(poly8_t const * a)3921 poly8x16_t test_vld1q_p8(poly8_t const * a) {
3922   return vld1q_p8(a);
3923 }
3924 
3925 // CHECK-LABEL: @test_vld1q_p16(
3926 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
3927 // CHECK:   [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3928 // CHECK:   ret <8 x i16> [[VLD1]]
test_vld1q_p16(poly16_t const * a)3929 poly16x8_t test_vld1q_p16(poly16_t const * a) {
3930   return vld1q_p16(a);
3931 }
3932 
3933 // CHECK-LABEL: @test_vld1_u8(
3934 // CHECK:   [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
3935 // CHECK:   ret <8 x i8> [[VLD1]]
test_vld1_u8(uint8_t const * a)3936 uint8x8_t test_vld1_u8(uint8_t const * a) {
3937   return vld1_u8(a);
3938 }
3939 
3940 // CHECK-LABEL: @test_vld1_u16(
3941 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
3942 // CHECK:   [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
3943 // CHECK:   ret <4 x i16> [[VLD1]]
test_vld1_u16(uint16_t const * a)3944 uint16x4_t test_vld1_u16(uint16_t const * a) {
3945   return vld1_u16(a);
3946 }
3947 
3948 // CHECK-LABEL: @test_vld1_u32(
3949 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
3950 // CHECK:   [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
3951 // CHECK:   ret <2 x i32> [[VLD1]]
test_vld1_u32(uint32_t const * a)3952 uint32x2_t test_vld1_u32(uint32_t const * a) {
3953   return vld1_u32(a);
3954 }
3955 
3956 // CHECK-LABEL: @test_vld1_u64(
3957 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
3958 // CHECK:   [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
3959 // CHECK:   ret <1 x i64> [[VLD1]]
test_vld1_u64(uint64_t const * a)3960 uint64x1_t test_vld1_u64(uint64_t const * a) {
3961   return vld1_u64(a);
3962 }
3963 
3964 // CHECK-LABEL: @test_vld1_s8(
3965 // CHECK:   [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
3966 // CHECK:   ret <8 x i8> [[VLD1]]
test_vld1_s8(int8_t const * a)3967 int8x8_t test_vld1_s8(int8_t const * a) {
3968   return vld1_s8(a);
3969 }
3970 
3971 // CHECK-LABEL: @test_vld1_s16(
3972 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
3973 // CHECK:   [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
3974 // CHECK:   ret <4 x i16> [[VLD1]]
test_vld1_s16(int16_t const * a)3975 int16x4_t test_vld1_s16(int16_t const * a) {
3976   return vld1_s16(a);
3977 }
3978 
3979 // CHECK-LABEL: @test_vld1_s32(
3980 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
3981 // CHECK:   [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
3982 // CHECK:   ret <2 x i32> [[VLD1]]
test_vld1_s32(int32_t const * a)3983 int32x2_t test_vld1_s32(int32_t const * a) {
3984   return vld1_s32(a);
3985 }
3986 
3987 // CHECK-LABEL: @test_vld1_s64(
3988 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
3989 // CHECK:   [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
3990 // CHECK:   ret <1 x i64> [[VLD1]]
test_vld1_s64(int64_t const * a)3991 int64x1_t test_vld1_s64(int64_t const * a) {
3992   return vld1_s64(a);
3993 }
3994 
3995 // CHECK-LABEL: @test_vld1_f16(
3996 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
3997 // CHECK:   [[VLD1:%.*]] = call <4 x half> @llvm.arm.neon.vld1.v4f16.p0i8(i8* [[TMP0]], i32 2)
3998 // CHECK:   ret <4 x half> [[VLD1]]
test_vld1_f16(float16_t const * a)3999 float16x4_t test_vld1_f16(float16_t const * a) {
4000   return vld1_f16(a);
4001 }
4002 
4003 // CHECK-LABEL: @test_vld1_f32(
4004 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4005 // CHECK:   [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* [[TMP0]], i32 4)
4006 // CHECK:   ret <2 x float> [[VLD1]]
test_vld1_f32(float32_t const * a)4007 float32x2_t test_vld1_f32(float32_t const * a) {
4008   return vld1_f32(a);
4009 }
4010 
4011 // CHECK-LABEL: @test_vld1_p8(
4012 // CHECK:   [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
4013 // CHECK:   ret <8 x i8> [[VLD1]]
test_vld1_p8(poly8_t const * a)4014 poly8x8_t test_vld1_p8(poly8_t const * a) {
4015   return vld1_p8(a);
4016 }
4017 
4018 // CHECK-LABEL: @test_vld1_p16(
4019 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4020 // CHECK:   [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4021 // CHECK:   ret <4 x i16> [[VLD1]]
test_vld1_p16(poly16_t const * a)4022 poly16x4_t test_vld1_p16(poly16_t const * a) {
4023   return vld1_p16(a);
4024 }
4025 
4026 // CHECK-LABEL: @test_vld1q_dup_u8(
4027 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4028 // CHECK:   [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4029 // CHECK:   [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4030 // CHECK:   ret <16 x i8> [[LANE]]
test_vld1q_dup_u8(uint8_t const * a)4031 uint8x16_t test_vld1q_dup_u8(uint8_t const * a) {
4032   return vld1q_dup_u8(a);
4033 }
4034 
4035 // CHECK-LABEL: @test_vld1q_dup_u16(
4036 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4037 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4038 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4039 // CHECK:   [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4040 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4041 // CHECK:   ret <8 x i16> [[LANE]]
test_vld1q_dup_u16(uint16_t const * a)4042 uint16x8_t test_vld1q_dup_u16(uint16_t const * a) {
4043   return vld1q_dup_u16(a);
4044 }
4045 
4046 // CHECK-LABEL: @test_vld1q_dup_u32(
4047 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4048 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4049 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4050 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4051 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4052 // CHECK:   ret <4 x i32> [[LANE]]
test_vld1q_dup_u32(uint32_t const * a)4053 uint32x4_t test_vld1q_dup_u32(uint32_t const * a) {
4054   return vld1q_dup_u32(a);
4055 }
4056 
4057 // CHECK-LABEL: @test_vld1q_dup_u64(
4058 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4059 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4060 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4061 // CHECK:   [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4062 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4063 // CHECK:   ret <2 x i64> [[LANE]]
test_vld1q_dup_u64(uint64_t const * a)4064 uint64x2_t test_vld1q_dup_u64(uint64_t const * a) {
4065   return vld1q_dup_u64(a);
4066 }
4067 
4068 // CHECK-LABEL: @test_vld1q_dup_s8(
4069 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4070 // CHECK:   [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4071 // CHECK:   [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4072 // CHECK:   ret <16 x i8> [[LANE]]
test_vld1q_dup_s8(int8_t const * a)4073 int8x16_t test_vld1q_dup_s8(int8_t const * a) {
4074   return vld1q_dup_s8(a);
4075 }
4076 
4077 // CHECK-LABEL: @test_vld1q_dup_s16(
4078 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4079 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4080 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4081 // CHECK:   [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4082 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4083 // CHECK:   ret <8 x i16> [[LANE]]
test_vld1q_dup_s16(int16_t const * a)4084 int16x8_t test_vld1q_dup_s16(int16_t const * a) {
4085   return vld1q_dup_s16(a);
4086 }
4087 
4088 // CHECK-LABEL: @test_vld1q_dup_s32(
4089 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4090 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4091 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4092 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4093 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4094 // CHECK:   ret <4 x i32> [[LANE]]
test_vld1q_dup_s32(int32_t const * a)4095 int32x4_t test_vld1q_dup_s32(int32_t const * a) {
4096   return vld1q_dup_s32(a);
4097 }
4098 
4099 // CHECK-LABEL: @test_vld1q_dup_s64(
4100 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4101 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4102 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4103 // CHECK:   [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4104 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4105 // CHECK:   ret <2 x i64> [[LANE]]
test_vld1q_dup_s64(int64_t const * a)4106 int64x2_t test_vld1q_dup_s64(int64_t const * a) {
4107   return vld1q_dup_s64(a);
4108 }
4109 
4110 // CHECK-LABEL: @test_vld1q_dup_f16(
4111 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
4112 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
4113 // CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
4114 // CHECK:   [[TMP3:%.*]] = insertelement <8 x half> undef, half [[TMP2]], i32 0
4115 // CHECK:   [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
4116 // CHECK:   ret <8 x half> [[LANE]]
test_vld1q_dup_f16(float16_t const * a)4117 float16x8_t test_vld1q_dup_f16(float16_t const * a) {
4118   return vld1q_dup_f16(a);
4119 }
4120 
4121 // CHECK-LABEL: @test_vld1q_dup_f32(
4122 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4123 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4124 // CHECK:   [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4125 // CHECK:   [[TMP3:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
4126 // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer
4127 // CHECK:   ret <4 x float> [[LANE]]
test_vld1q_dup_f32(float32_t const * a)4128 float32x4_t test_vld1q_dup_f32(float32_t const * a) {
4129   return vld1q_dup_f32(a);
4130 }
4131 
4132 // CHECK-LABEL: @test_vld1q_dup_p8(
4133 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4134 // CHECK:   [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4135 // CHECK:   [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4136 // CHECK:   ret <16 x i8> [[LANE]]
test_vld1q_dup_p8(poly8_t const * a)4137 poly8x16_t test_vld1q_dup_p8(poly8_t const * a) {
4138   return vld1q_dup_p8(a);
4139 }
4140 
4141 // CHECK-LABEL: @test_vld1q_dup_p16(
4142 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4143 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4144 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4145 // CHECK:   [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4146 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4147 // CHECK:   ret <8 x i16> [[LANE]]
test_vld1q_dup_p16(poly16_t const * a)4148 poly16x8_t test_vld1q_dup_p16(poly16_t const * a) {
4149   return vld1q_dup_p16(a);
4150 }
4151 
4152 // CHECK-LABEL: @test_vld1_dup_u8(
4153 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4154 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4155 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4156 // CHECK:   ret <8 x i8> [[LANE]]
test_vld1_dup_u8(uint8_t const * a)4157 uint8x8_t test_vld1_dup_u8(uint8_t const * a) {
4158   return vld1_dup_u8(a);
4159 }
4160 
4161 // CHECK-LABEL: @test_vld1_dup_u16(
4162 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4163 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4164 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4165 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4166 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4167 // CHECK:   ret <4 x i16> [[LANE]]
test_vld1_dup_u16(uint16_t const * a)4168 uint16x4_t test_vld1_dup_u16(uint16_t const * a) {
4169   return vld1_dup_u16(a);
4170 }
4171 
4172 // CHECK-LABEL: @test_vld1_dup_u32(
4173 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4174 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4175 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4176 // CHECK:   [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4177 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4178 // CHECK:   ret <2 x i32> [[LANE]]
test_vld1_dup_u32(uint32_t const * a)4179 uint32x2_t test_vld1_dup_u32(uint32_t const * a) {
4180   return vld1_dup_u32(a);
4181 }
4182 
4183 // CHECK-LABEL: @test_vld1_dup_u64(
4184 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4185 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4186 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4187 // CHECK:   [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4188 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4189 // CHECK:   ret <1 x i64> [[LANE]]
test_vld1_dup_u64(uint64_t const * a)4190 uint64x1_t test_vld1_dup_u64(uint64_t const * a) {
4191   return vld1_dup_u64(a);
4192 }
4193 
4194 // CHECK-LABEL: @test_vld1_dup_s8(
4195 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4196 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4197 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4198 // CHECK:   ret <8 x i8> [[LANE]]
test_vld1_dup_s8(int8_t const * a)4199 int8x8_t test_vld1_dup_s8(int8_t const * a) {
4200   return vld1_dup_s8(a);
4201 }
4202 
4203 // CHECK-LABEL: @test_vld1_dup_s16(
4204 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4205 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4206 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4207 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4208 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4209 // CHECK:   ret <4 x i16> [[LANE]]
test_vld1_dup_s16(int16_t const * a)4210 int16x4_t test_vld1_dup_s16(int16_t const * a) {
4211   return vld1_dup_s16(a);
4212 }
4213 
4214 // CHECK-LABEL: @test_vld1_dup_s32(
4215 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4216 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4217 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4218 // CHECK:   [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4219 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4220 // CHECK:   ret <2 x i32> [[LANE]]
test_vld1_dup_s32(int32_t const * a)4221 int32x2_t test_vld1_dup_s32(int32_t const * a) {
4222   return vld1_dup_s32(a);
4223 }
4224 
4225 // CHECK-LABEL: @test_vld1_dup_s64(
4226 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4227 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4228 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4229 // CHECK:   [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4230 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4231 // CHECK:   ret <1 x i64> [[LANE]]
test_vld1_dup_s64(int64_t const * a)4232 int64x1_t test_vld1_dup_s64(int64_t const * a) {
4233   return vld1_dup_s64(a);
4234 }
4235 
4236 // CHECK-LABEL: @test_vld1_dup_f16(
4237 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
4238 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
4239 // CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
4240 // CHECK:   [[TMP3:%.*]] = insertelement <4 x half> undef, half [[TMP2]], i32 0
4241 // CHECK:   [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
4242 // CHECK:   ret <4 x half> [[LANE]]
test_vld1_dup_f16(float16_t const * a)4243 float16x4_t test_vld1_dup_f16(float16_t const * a) {
4244   return vld1_dup_f16(a);
4245 }
4246 
4247 // CHECK-LABEL: @test_vld1_dup_f32(
4248 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4249 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4250 // CHECK:   [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4251 // CHECK:   [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
4252 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
4253 // CHECK:   ret <2 x float> [[LANE]]
test_vld1_dup_f32(float32_t const * a)4254 float32x2_t test_vld1_dup_f32(float32_t const * a) {
4255   return vld1_dup_f32(a);
4256 }
4257 
4258 // CHECK-LABEL: @test_vld1_dup_p8(
4259 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4260 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4261 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4262 // CHECK:   ret <8 x i8> [[LANE]]
test_vld1_dup_p8(poly8_t const * a)4263 poly8x8_t test_vld1_dup_p8(poly8_t const * a) {
4264   return vld1_dup_p8(a);
4265 }
4266 
4267 // CHECK-LABEL: @test_vld1_dup_p16(
4268 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4269 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4270 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4271 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4272 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4273 // CHECK:   ret <4 x i16> [[LANE]]
test_vld1_dup_p16(poly16_t const * a)4274 poly16x4_t test_vld1_dup_p16(poly16_t const * a) {
4275   return vld1_dup_p16(a);
4276 }
4277 
4278 // CHECK-LABEL: @test_vld1q_lane_u8(
4279 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4280 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4281 // CHECK:   ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_u8(uint8_t const * a,uint8x16_t b)4282 uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) {
4283   return vld1q_lane_u8(a, b, 15);
4284 }
4285 
4286 // CHECK-LABEL: @test_vld1q_lane_u16(
4287 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4288 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4289 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4290 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4291 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4292 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4293 // CHECK:   ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_u16(uint16_t const * a,uint16x8_t b)4294 uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) {
4295   return vld1q_lane_u16(a, b, 7);
4296 }
4297 
4298 // CHECK-LABEL: @test_vld1q_lane_u32(
4299 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4300 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4301 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4302 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4303 // CHECK:   [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4304 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4305 // CHECK:   ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_u32(uint32_t const * a,uint32x4_t b)4306 uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) {
4307   return vld1q_lane_u32(a, b, 3);
4308 }
4309 
4310 // CHECK-LABEL: @test_vld1q_lane_u64(
4311 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4312 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4313 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4314 // CHECK:   [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4315 // CHECK:   [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4316 // CHECK:   [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4317 // CHECK:   ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_u64(uint64_t const * a,uint64x2_t b)4318 uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) {
4319   return vld1q_lane_u64(a, b, 1);
4320 }
4321 
4322 // CHECK-LABEL: @test_vld1q_lane_s8(
4323 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4324 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4325 // CHECK:   ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_s8(int8_t const * a,int8x16_t b)4326 int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) {
4327   return vld1q_lane_s8(a, b, 15);
4328 }
4329 
4330 // CHECK-LABEL: @test_vld1q_lane_s16(
4331 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4332 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4333 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4334 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4335 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4336 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4337 // CHECK:   ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_s16(int16_t const * a,int16x8_t b)4338 int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) {
4339   return vld1q_lane_s16(a, b, 7);
4340 }
4341 
4342 // CHECK-LABEL: @test_vld1q_lane_s32(
4343 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4344 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4345 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4346 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4347 // CHECK:   [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4348 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4349 // CHECK:   ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_s32(int32_t const * a,int32x4_t b)4350 int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) {
4351   return vld1q_lane_s32(a, b, 3);
4352 }
4353 
4354 // CHECK-LABEL: @test_vld1q_lane_s64(
4355 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4356 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4357 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4358 // CHECK:   [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4359 // CHECK:   [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4360 // CHECK:   [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4361 // CHECK:   ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_s64(int64_t const * a,int64x2_t b)4362 int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
4363   return vld1q_lane_s64(a, b, 1);
4364 }
4365 
4366 // CHECK-LABEL: @test_vld1q_lane_f16(
4367 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
4368 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
4369 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
4370 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
4371 // CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
4372 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
4373 // CHECK:   ret <8 x half> [[VLD1_LANE]]
test_vld1q_lane_f16(float16_t const * a,float16x8_t b)4374 float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
4375   return vld1q_lane_f16(a, b, 7);
4376 }
4377 
4378 // CHECK-LABEL: @test_vld1q_lane_f32(
4379 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4380 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4381 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4382 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4383 // CHECK:   [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4384 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3
4385 // CHECK:   ret <4 x float> [[VLD1_LANE]]
test_vld1q_lane_f32(float32_t const * a,float32x4_t b)4386 float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) {
4387   return vld1q_lane_f32(a, b, 3);
4388 }
4389 
4390 // CHECK-LABEL: @test_vld1q_lane_p8(
4391 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4392 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4393 // CHECK:   ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_p8(poly8_t const * a,poly8x16_t b)4394 poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) {
4395   return vld1q_lane_p8(a, b, 15);
4396 }
4397 
4398 // CHECK-LABEL: @test_vld1q_lane_p16(
4399 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4400 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4401 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4402 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4403 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4404 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4405 // CHECK:   ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_p16(poly16_t const * a,poly16x8_t b)4406 poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) {
4407   return vld1q_lane_p16(a, b, 7);
4408 }
4409 
4410 // CHECK-LABEL: @test_vld1_lane_u8(
4411 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4412 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4413 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_u8(uint8_t const * a,uint8x8_t b)4414 uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) {
4415   return vld1_lane_u8(a, b, 7);
4416 }
4417 
4418 // CHECK-LABEL: @test_vld1_lane_u16(
4419 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4420 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4421 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4422 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4423 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4424 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4425 // CHECK:   ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_u16(uint16_t const * a,uint16x4_t b)4426 uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) {
4427   return vld1_lane_u16(a, b, 3);
4428 }
4429 
4430 // CHECK-LABEL: @test_vld1_lane_u32(
4431 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4432 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4433 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4434 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4435 // CHECK:   [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4436 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4437 // CHECK:   ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_u32(uint32_t const * a,uint32x2_t b)4438 uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) {
4439   return vld1_lane_u32(a, b, 1);
4440 }
4441 
4442 // CHECK-LABEL: @test_vld1_lane_u64(
4443 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4444 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4445 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4446 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4447 // CHECK:   [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4448 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4449 // CHECK:   ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_u64(uint64_t const * a,uint64x1_t b)4450 uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) {
4451   return vld1_lane_u64(a, b, 0);
4452 }
4453 
4454 // CHECK-LABEL: @test_vld1_lane_s8(
4455 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4456 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4457 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_s8(int8_t const * a,int8x8_t b)4458 int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) {
4459   return vld1_lane_s8(a, b, 7);
4460 }
4461 
4462 // CHECK-LABEL: @test_vld1_lane_s16(
4463 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4464 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4465 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4466 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4467 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4468 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4469 // CHECK:   ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_s16(int16_t const * a,int16x4_t b)4470 int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) {
4471   return vld1_lane_s16(a, b, 3);
4472 }
4473 
4474 // CHECK-LABEL: @test_vld1_lane_s32(
4475 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4476 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4477 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4478 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4479 // CHECK:   [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4480 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4481 // CHECK:   ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_s32(int32_t const * a,int32x2_t b)4482 int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) {
4483   return vld1_lane_s32(a, b, 1);
4484 }
4485 
4486 // CHECK-LABEL: @test_vld1_lane_s64(
4487 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4488 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4489 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4490 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4491 // CHECK:   [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4492 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4493 // CHECK:   ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_s64(int64_t const * a,int64x1_t b)4494 int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
4495   return vld1_lane_s64(a, b, 0);
4496 }
4497 
4498 // CHECK-LABEL: @test_vld1_lane_f16(
4499 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
4500 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
4501 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
4502 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
4503 // CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
4504 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
4505 // CHECK:   ret <4 x half> [[VLD1_LANE]]
test_vld1_lane_f16(float16_t const * a,float16x4_t b)4506 float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
4507   return vld1_lane_f16(a, b, 3);
4508 }
4509 
4510 // CHECK-LABEL: @test_vld1_lane_f32(
4511 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4512 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4513 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4514 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4515 // CHECK:   [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4516 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1
4517 // CHECK:   ret <2 x float> [[VLD1_LANE]]
test_vld1_lane_f32(float32_t const * a,float32x2_t b)4518 float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) {
4519   return vld1_lane_f32(a, b, 1);
4520 }
4521 
4522 // CHECK-LABEL: @test_vld1_lane_p8(
4523 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4524 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4525 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_p8(poly8_t const * a,poly8x8_t b)4526 poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) {
4527   return vld1_lane_p8(a, b, 7);
4528 }
4529 
4530 // CHECK-LABEL: @test_vld1_lane_p16(
4531 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4532 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4533 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4534 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4535 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4536 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4537 // CHECK:   ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_p16(poly16_t const * a,poly16x4_t b)4538 poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) {
4539   return vld1_lane_p16(a, b, 3);
4540 }
4541 
4542 // CHECK-LABEL: @test_vld2q_u8(
4543 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
4544 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
4545 // CHECK:   [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_u8(uint8_t const * a)4546 uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
4547   return vld2q_u8(a);
4548 }
4549 
4550 // CHECK-LABEL: @test_vld2q_u16(
4551 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4552 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4553 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4554 // CHECK:   [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_u16(uint16_t const * a)4555 uint16x8x2_t test_vld2q_u16(uint16_t const * a) {
4556   return vld2q_u16(a);
4557 }
4558 
4559 // CHECK-LABEL: @test_vld2q_u32(
4560 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4561 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4562 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
4563 // CHECK:   [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_u32(uint32_t const * a)4564 uint32x4x2_t test_vld2q_u32(uint32_t const * a) {
4565   return vld2q_u32(a);
4566 }
4567 
4568 // CHECK-LABEL: @test_vld2q_s8(
4569 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
4570 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
4571 // CHECK:   [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_s8(int8_t const * a)4572 int8x16x2_t test_vld2q_s8(int8_t const * a) {
4573   return vld2q_s8(a);
4574 }
4575 
4576 // CHECK-LABEL: @test_vld2q_s16(
4577 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4578 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4579 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4580 // CHECK:   [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_s16(int16_t const * a)4581 int16x8x2_t test_vld2q_s16(int16_t const * a) {
4582   return vld2q_s16(a);
4583 }
4584 
4585 // CHECK-LABEL: @test_vld2q_s32(
4586 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4587 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4588 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
4589 // CHECK:   [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_s32(int32_t const * a)4590 int32x4x2_t test_vld2q_s32(int32_t const * a) {
4591   return vld2q_s32(a);
4592 }
4593 
4594 // CHECK-LABEL: @test_vld2q_f16(
4595 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4596 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4597 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
4598 // CHECK:   [[VLD2Q_V:%.*]] = call { <8 x half>, <8 x half>
test_vld2q_f16(float16_t const * a)4599 float16x8x2_t test_vld2q_f16(float16_t const * a) {
4600   return vld2q_f16(a);
4601 }
4602 
4603 // CHECK-LABEL: @test_vld2q_f32(
4604 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4605 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4606 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
4607 // CHECK:   [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float>
test_vld2q_f32(float32_t const * a)4608 float32x4x2_t test_vld2q_f32(float32_t const * a) {
4609   return vld2q_f32(a);
4610 }
4611 
4612 // CHECK-LABEL: @test_vld2q_p8(
4613 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
4614 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
4615 // CHECK:   [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_p8(poly8_t const * a)4616 poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
4617   return vld2q_p8(a);
4618 }
4619 
4620 // CHECK-LABEL: @test_vld2q_p16(
4621 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4622 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4623 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4624 // CHECK:   [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_p16(poly16_t const * a)4625 poly16x8x2_t test_vld2q_p16(poly16_t const * a) {
4626   return vld2q_p16(a);
4627 }
4628 
4629 // CHECK-LABEL: @test_vld2_u8(
4630 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4631 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4632 // CHECK:   [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_u8(uint8_t const * a)4633 uint8x8x2_t test_vld2_u8(uint8_t const * a) {
4634   return vld2_u8(a);
4635 }
4636 
4637 // CHECK-LABEL: @test_vld2_u16(
4638 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4639 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
4640 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4641 // CHECK:   [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_u16(uint16_t const * a)4642 uint16x4x2_t test_vld2_u16(uint16_t const * a) {
4643   return vld2_u16(a);
4644 }
4645 
4646 // CHECK-LABEL: @test_vld2_u32(
4647 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
4648 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
4649 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
4650 // CHECK:   [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_u32(uint32_t const * a)4651 uint32x2x2_t test_vld2_u32(uint32_t const * a) {
4652   return vld2_u32(a);
4653 }
4654 
4655 // CHECK-LABEL: @test_vld2_u64(
4656 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
4657 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
4658 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
4659 // CHECK:   [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
test_vld2_u64(uint64_t const * a)4660 uint64x1x2_t test_vld2_u64(uint64_t const * a) {
4661   return vld2_u64(a);
4662 }
4663 
4664 // CHECK-LABEL: @test_vld2_s8(
4665 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
4666 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
4667 // CHECK:   [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_s8(int8_t const * a)4668 int8x8x2_t test_vld2_s8(int8_t const * a) {
4669   return vld2_s8(a);
4670 }
4671 
4672 // CHECK-LABEL: @test_vld2_s16(
4673 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
4674 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
4675 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4676 // CHECK:   [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_s16(int16_t const * a)4677 int16x4x2_t test_vld2_s16(int16_t const * a) {
4678   return vld2_s16(a);
4679 }
4680 
4681 // CHECK-LABEL: @test_vld2_s32(
4682 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
4683 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
4684 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
4685 // CHECK:   [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_s32(int32_t const * a)4686 int32x2x2_t test_vld2_s32(int32_t const * a) {
4687   return vld2_s32(a);
4688 }
4689 
4690 // CHECK-LABEL: @test_vld2_s64(
4691 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
4692 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
4693 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
4694 // CHECK:   [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
test_vld2_s64(int64_t const * a)4695 int64x1x2_t test_vld2_s64(int64_t const * a) {
4696   return vld2_s64(a);
4697 }
4698 
4699 // CHECK-LABEL: @test_vld2_f16(
4700 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
4701 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
4702 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
4703 // CHECK:   [[VLD2_V:%.*]] = call { <4 x half>, <4 x half>
test_vld2_f16(float16_t const * a)4704 float16x4x2_t test_vld2_f16(float16_t const * a) {
4705   return vld2_f16(a);
4706 }
4707 
4708 // CHECK-LABEL: @test_vld2_f32(
4709 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
4710 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
4711 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
4712 // CHECK:   [[VLD2_V:%.*]] = call { <2 x float>, <2 x float>
test_vld2_f32(float32_t const * a)4713 float32x2x2_t test_vld2_f32(float32_t const * a) {
4714   return vld2_f32(a);
4715 }
4716 
4717 // CHECK-LABEL: @test_vld2_p8(
4718 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
4719 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
4720 // CHECK:   [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_p8(poly8_t const * a)4721 poly8x8x2_t test_vld2_p8(poly8_t const * a) {
4722   return vld2_p8(a);
4723 }
4724 
4725 // CHECK-LABEL: @test_vld2_p16(
4726 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
4727 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
4728 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4729 // CHECK:   [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_p16(poly16_t const * a)4730 poly16x4x2_t test_vld2_p16(poly16_t const * a) {
4731   return vld2_p16(a);
4732 }
4733 
4734 // CHECK-LABEL: @test_vld2q_lane_u16(
4735 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
4736 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
4737 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4738 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
4739 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4740 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4741 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
4742 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
4743 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4744 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4745 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
4746 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
4747 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4748 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4749 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4750 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
4751 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4752 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4753 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4754 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4755 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4756 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_u16(uint16_t const * a,uint16x8x2_t b)4757 uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) {
4758   return vld2q_lane_u16(a, b, 7);
4759 }
4760 
4761 // CHECK-LABEL: @test_vld2q_lane_u32(
4762 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
4763 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
4764 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4765 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
4766 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
4767 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4768 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
4769 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
4770 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4771 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4772 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
4773 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
4774 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
4775 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
4776 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4777 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
4778 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
4779 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
4780 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4781 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4782 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4783 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_lane_u32(uint32_t const * a,uint32x4x2_t b)4784 uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) {
4785   return vld2q_lane_u32(a, b, 3);
4786 }
4787 
4788 // CHECK-LABEL: @test_vld2q_lane_s16(
4789 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
4790 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
4791 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4792 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
4793 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4794 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4795 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
4796 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
4797 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4798 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4799 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
4800 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
4801 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4802 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4803 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4804 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
4805 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4806 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4807 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4808 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4809 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4810 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_s16(int16_t const * a,int16x8x2_t b)4811 int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) {
4812   return vld2q_lane_s16(a, b, 7);
4813 }
4814 
4815 // CHECK-LABEL: @test_vld2q_lane_s32(
4816 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
4817 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
4818 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4819 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
4820 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
4821 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4822 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
4823 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
4824 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4825 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4826 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
4827 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
4828 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
4829 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
4830 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4831 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
4832 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
4833 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
4834 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4835 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4836 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4837 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_lane_s32(int32_t const * a,int32x4x2_t b)4838 int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
4839   return vld2q_lane_s32(a, b, 3);
4840 }
4841 
4842 // CHECK-LABEL: @test_vld2q_lane_f16(
4843 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
4844 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
4845 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4846 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
4847 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
4848 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4849 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
4850 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
4851 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4852 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4853 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
4854 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
4855 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
4856 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
4857 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
4858 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
4859 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
4860 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
4861 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
4862 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
4863 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
4864 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>
test_vld2q_lane_f16(float16_t const * a,float16x8x2_t b)4865 float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
4866   return vld2q_lane_f16(a, b, 7);
4867 }
4868 
4869 // CHECK-LABEL: @test_vld2q_lane_f32(
4870 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
4871 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
4872 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4873 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
4874 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
4875 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4876 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
4877 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
4878 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4879 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4880 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
4881 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
4882 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
4883 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
4884 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
4885 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
4886 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
4887 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
4888 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
4889 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
4890 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
4891 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>
test_vld2q_lane_f32(float32_t const * a,float32x4x2_t b)4892 float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) {
4893   return vld2q_lane_f32(a, b, 3);
4894 }
4895 
4896 // CHECK-LABEL: @test_vld2q_lane_p16(
4897 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
4898 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
4899 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4900 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
4901 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4902 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4903 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
4904 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
4905 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4906 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4907 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
4908 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
4909 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4910 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4911 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4912 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
4913 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4914 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4915 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4916 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4917 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4918 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_p16(poly16_t const * a,poly16x8x2_t b)4919 poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) {
4920   return vld2q_lane_p16(a, b, 7);
4921 }
4922 
4923 // CHECK-LABEL: @test_vld2_lane_u8(
4924 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
4925 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
4926 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4927 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
4928 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
4929 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
4930 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
4931 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
4932 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
4933 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4934 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
4935 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
4936 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
4937 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
4938 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
4939 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
4940 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_u8(uint8_t const * a,uint8x8x2_t b)4941 uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) {
4942   return vld2_lane_u8(a, b, 7);
4943 }
4944 
4945 // CHECK-LABEL: @test_vld2_lane_u16(
4946 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
4947 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
4948 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4949 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
4950 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
4951 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
4952 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
4953 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
4954 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
4955 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
4956 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
4957 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
4958 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
4959 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
4960 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
4961 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
4962 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
4963 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
4964 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
4965 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
4966 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
4967 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_u16(uint16_t const * a,uint16x4x2_t b)4968 uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) {
4969   return vld2_lane_u16(a, b, 3);
4970 }
4971 
4972 // CHECK-LABEL: @test_vld2_lane_u32(
4973 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
4974 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
4975 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
4976 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
4977 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
4978 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
4979 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
4980 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
4981 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
4982 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
4983 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
4984 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
4985 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
4986 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
4987 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
4988 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
4989 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
4990 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
4991 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
4992 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
4993 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
4994 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_lane_u32(uint32_t const * a,uint32x2x2_t b)4995 uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) {
4996   return vld2_lane_u32(a, b, 1);
4997 }
4998 
4999 // CHECK-LABEL: @test_vld2_lane_s8(
5000 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
5001 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
5002 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
5003 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
5004 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5005 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5006 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
5007 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
5008 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5009 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5010 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5011 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5012 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5013 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5014 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5015 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5016 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_s8(int8_t const * a,int8x8x2_t b)5017 int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) {
5018   return vld2_lane_s8(a, b, 7);
5019 }
5020 
5021 // CHECK-LABEL: @test_vld2_lane_s16(
5022 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
5023 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
5024 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
5025 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
5026 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5027 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5028 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
5029 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
5030 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5031 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5032 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5033 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5034 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5035 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5036 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5037 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5038 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5039 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5040 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5041 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5042 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5043 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_s16(int16_t const * a,int16x4x2_t b)5044 int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) {
5045   return vld2_lane_s16(a, b, 3);
5046 }
5047 
5048 // CHECK-LABEL: @test_vld2_lane_s32(
5049 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
5050 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
5051 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
5052 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
5053 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
5054 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5055 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
5056 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
5057 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5058 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5059 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5060 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5061 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
5062 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5063 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5064 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5065 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5066 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5067 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5068 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5069 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5070 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_lane_s32(int32_t const * a,int32x2x2_t b)5071 int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
5072   return vld2_lane_s32(a, b, 1);
5073 }
5074 
5075 // CHECK-LABEL: @test_vld2_lane_f16(
5076 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
5077 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
5078 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
5079 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
5080 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
5081 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5082 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
5083 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
5084 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5085 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5086 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
5087 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5088 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
5089 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
5090 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5091 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5092 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
5093 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
5094 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5095 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
5096 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
5097 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x half>, <4 x half>
test_vld2_lane_f16(float16_t const * a,float16x4x2_t b)5098 float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
5099   return vld2_lane_f16(a, b, 3);
5100 }
5101 
5102 // CHECK-LABEL: @test_vld2_lane_f32(
5103 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
5104 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
5105 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
5106 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
5107 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
5108 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5109 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
5110 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
5111 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5112 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5113 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
5114 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5115 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
5116 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
5117 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5118 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5119 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
5120 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
5121 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5122 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5123 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5124 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float>
test_vld2_lane_f32(float32_t const * a,float32x2x2_t b)5125 float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) {
5126   return vld2_lane_f32(a, b, 1);
5127 }
5128 
5129 // CHECK-LABEL: @test_vld2_lane_p8(
5130 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
5131 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
5132 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
5133 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
5134 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5135 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5136 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
5137 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
5138 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5139 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5140 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5141 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5142 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5143 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5144 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5145 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5146 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_p8(poly8_t const * a,poly8x8x2_t b)5147 poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) {
5148   return vld2_lane_p8(a, b, 7);
5149 }
5150 
5151 // CHECK-LABEL: @test_vld2_lane_p16(
5152 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
5153 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
5154 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
5155 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
5156 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5157 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5158 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
5159 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
5160 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5161 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5162 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5163 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5164 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5165 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5166 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5167 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5168 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5169 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5170 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5171 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5172 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5173 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_p16(poly16_t const * a,poly16x4x2_t b)5174 poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) {
5175   return vld2_lane_p16(a, b, 3);
5176 }
5177 
5178 // CHECK-LABEL: @test_vld3q_u8(
5179 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
5180 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
5181 // CHECK:   [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_u8(uint8_t const * a)5182 uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
5183   return vld3q_u8(a);
5184 }
5185 
5186 // CHECK-LABEL: @test_vld3q_u16(
5187 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5188 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5189 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5190 // CHECK:   [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_u16(uint16_t const * a)5191 uint16x8x3_t test_vld3q_u16(uint16_t const * a) {
5192   return vld3q_u16(a);
5193 }
5194 
5195 // CHECK-LABEL: @test_vld3q_u32(
5196 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5197 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5198 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5199 // CHECK:   [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_u32(uint32_t const * a)5200 uint32x4x3_t test_vld3q_u32(uint32_t const * a) {
5201   return vld3q_u32(a);
5202 }
5203 
5204 // CHECK-LABEL: @test_vld3q_s8(
5205 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
5206 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
5207 // CHECK:   [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_s8(int8_t const * a)5208 int8x16x3_t test_vld3q_s8(int8_t const * a) {
5209   return vld3q_s8(a);
5210 }
5211 
5212 // CHECK-LABEL: @test_vld3q_s16(
5213 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5214 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
5215 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5216 // CHECK:   [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_s16(int16_t const * a)5217 int16x8x3_t test_vld3q_s16(int16_t const * a) {
5218   return vld3q_s16(a);
5219 }
5220 
5221 // CHECK-LABEL: @test_vld3q_s32(
5222 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5223 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
5224 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5225 // CHECK:   [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_s32(int32_t const * a)5226 int32x4x3_t test_vld3q_s32(int32_t const * a) {
5227   return vld3q_s32(a);
5228 }
5229 
5230 // CHECK-LABEL: @test_vld3q_f16(
5231 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5232 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
5233 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
5234 // CHECK:   [[VLD3Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
test_vld3q_f16(float16_t const * a)5235 float16x8x3_t test_vld3q_f16(float16_t const * a) {
5236   return vld3q_f16(a);
5237 }
5238 
5239 // CHECK-LABEL: @test_vld3q_f32(
5240 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5241 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
5242 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
5243 // CHECK:   [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
test_vld3q_f32(float32_t const * a)5244 float32x4x3_t test_vld3q_f32(float32_t const * a) {
5245   return vld3q_f32(a);
5246 }
5247 
5248 // CHECK-LABEL: @test_vld3q_p8(
5249 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
5250 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
5251 // CHECK:   [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_p8(poly8_t const * a)5252 poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
5253   return vld3q_p8(a);
5254 }
5255 
5256 // CHECK-LABEL: @test_vld3q_p16(
5257 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5258 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
5259 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5260 // CHECK:   [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_p16(poly16_t const * a)5261 poly16x8x3_t test_vld3q_p16(poly16_t const * a) {
5262   return vld3q_p16(a);
5263 }
5264 
5265 // CHECK-LABEL: @test_vld3_u8(
5266 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5267 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
5268 // CHECK:   [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_u8(uint8_t const * a)5269 uint8x8x3_t test_vld3_u8(uint8_t const * a) {
5270   return vld3_u8(a);
5271 }
5272 
5273 // CHECK-LABEL: @test_vld3_u16(
5274 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5275 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
5276 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5277 // CHECK:   [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_u16(uint16_t const * a)5278 uint16x4x3_t test_vld3_u16(uint16_t const * a) {
5279   return vld3_u16(a);
5280 }
5281 
5282 // CHECK-LABEL: @test_vld3_u32(
5283 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5284 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
5285 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5286 // CHECK:   [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_u32(uint32_t const * a)5287 uint32x2x3_t test_vld3_u32(uint32_t const * a) {
5288   return vld3_u32(a);
5289 }
5290 
5291 // CHECK-LABEL: @test_vld3_u64(
5292 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
5293 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
5294 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
5295 // CHECK:   [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
test_vld3_u64(uint64_t const * a)5296 uint64x1x3_t test_vld3_u64(uint64_t const * a) {
5297   return vld3_u64(a);
5298 }
5299 
5300 // CHECK-LABEL: @test_vld3_s8(
5301 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5302 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
5303 // CHECK:   [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_s8(int8_t const * a)5304 int8x8x3_t test_vld3_s8(int8_t const * a) {
5305   return vld3_s8(a);
5306 }
5307 
5308 // CHECK-LABEL: @test_vld3_s16(
5309 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5310 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
5311 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5312 // CHECK:   [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_s16(int16_t const * a)5313 int16x4x3_t test_vld3_s16(int16_t const * a) {
5314   return vld3_s16(a);
5315 }
5316 
5317 // CHECK-LABEL: @test_vld3_s32(
5318 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5319 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
5320 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5321 // CHECK:   [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_s32(int32_t const * a)5322 int32x2x3_t test_vld3_s32(int32_t const * a) {
5323   return vld3_s32(a);
5324 }
5325 
5326 // CHECK-LABEL: @test_vld3_s64(
5327 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
5328 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
5329 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
5330 // CHECK:   [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
test_vld3_s64(int64_t const * a)5331 int64x1x3_t test_vld3_s64(int64_t const * a) {
5332   return vld3_s64(a);
5333 }
5334 
5335 // CHECK-LABEL: @test_vld3_f16(
5336 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5337 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
5338 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
5339 // CHECK:   [[VLD3_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
test_vld3_f16(float16_t const * a)5340 float16x4x3_t test_vld3_f16(float16_t const * a) {
5341   return vld3_f16(a);
5342 }
5343 
5344 // CHECK-LABEL: @test_vld3_f32(
5345 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5346 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
5347 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
5348 // CHECK:   [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
test_vld3_f32(float32_t const * a)5349 float32x2x3_t test_vld3_f32(float32_t const * a) {
5350   return vld3_f32(a);
5351 }
5352 
5353 // CHECK-LABEL: @test_vld3_p8(
5354 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5355 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
5356 // CHECK:   [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_p8(poly8_t const * a)5357 poly8x8x3_t test_vld3_p8(poly8_t const * a) {
5358   return vld3_p8(a);
5359 }
5360 
5361 // CHECK-LABEL: @test_vld3_p16(
5362 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5363 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
5364 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5365 // CHECK:   [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_p16(poly16_t const * a)5366 poly16x4x3_t test_vld3_p16(poly16_t const * a) {
5367   return vld3_p16(a);
5368 }
5369 
5370 // CHECK-LABEL: @test_vld3q_lane_u16(
5371 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
5372 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
5373 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5374 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
5375 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5376 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5377 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
5378 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
5379 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5380 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5381 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5382 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5383 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5384 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5385 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5386 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5387 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5388 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5389 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5390 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5391 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5392 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5393 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5394 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5395 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5396 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5397 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_u16(uint16_t const * a,uint16x8x3_t b)5398 uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) {
5399   return vld3q_lane_u16(a, b, 7);
5400 }
5401 
5402 // CHECK-LABEL: @test_vld3q_lane_u32(
5403 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
5404 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
5405 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5406 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
5407 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
5408 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5409 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
5410 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
5411 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5412 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5413 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5414 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5415 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
5416 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5417 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5418 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5419 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5420 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5421 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5422 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5423 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
5424 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
5425 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5426 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5427 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5428 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5429 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_lane_u32(uint32_t const * a,uint32x4x3_t b)5430 uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) {
5431   return vld3q_lane_u32(a, b, 3);
5432 }
5433 
5434 // CHECK-LABEL: @test_vld3q_lane_s16(
5435 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
5436 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
5437 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5438 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
5439 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5440 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5441 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
5442 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
5443 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5444 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
5445 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5446 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5447 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5448 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5449 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5450 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5451 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5452 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5453 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5454 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5455 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5456 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5457 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5458 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5459 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5460 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5461 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_s16(int16_t const * a,int16x8x3_t b)5462 int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) {
5463   return vld3q_lane_s16(a, b, 7);
5464 }
5465 
5466 // CHECK-LABEL: @test_vld3q_lane_s32(
5467 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
5468 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
5469 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5470 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
5471 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
5472 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5473 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
5474 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
5475 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5476 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
5477 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5478 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5479 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
5480 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5481 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5482 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5483 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5484 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5485 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5486 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5487 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
5488 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
5489 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5490 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5491 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5492 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5493 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_lane_s32(int32_t const * a,int32x4x3_t b)5494 int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
5495   return vld3q_lane_s32(a, b, 3);
5496 }
5497 
5498 // CHECK-LABEL: @test_vld3q_lane_f16(
5499 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
5500 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
5501 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5502 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
5503 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
5504 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5505 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
5506 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
5507 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5508 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
5509 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
5510 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5511 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
5512 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
5513 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
5514 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5515 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
5516 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
5517 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
5518 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5519 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
5520 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
5521 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
5522 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
5523 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
5524 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
5525 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
test_vld3q_lane_f16(float16_t const * a,float16x8x3_t b)5526 float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
5527   return vld3q_lane_f16(a, b, 7);
5528 }
5529 
5530 // CHECK-LABEL: @test_vld3q_lane_f32(
5531 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
5532 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
5533 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5534 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
5535 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
5536 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5537 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
5538 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
5539 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5540 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
5541 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
5542 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5543 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
5544 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
5545 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
5546 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5547 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
5548 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
5549 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
5550 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5551 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
5552 // CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
5553 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
5554 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
5555 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
5556 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
5557 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
test_vld3q_lane_f32(float32_t const * a,float32x4x3_t b)5558 float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) {
5559   return vld3q_lane_f32(a, b, 3);
5560 }
5561 
5562 // CHECK-LABEL: @test_vld3q_lane_p16(
5563 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
5564 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
5565 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5566 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
5567 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5568 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5569 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
5570 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
5571 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5572 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
5573 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5574 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5575 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5576 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5577 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5578 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5579 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5580 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5581 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5582 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5583 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5584 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5585 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5586 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5587 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5588 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5589 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_p16(poly16_t const * a,poly16x8x3_t b)5590 poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) {
5591   return vld3q_lane_p16(a, b, 7);
5592 }
5593 
5594 // CHECK-LABEL: @test_vld3_lane_u8(
5595 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
5596 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
5597 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5598 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
5599 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5600 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5601 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
5602 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
5603 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5604 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
5605 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5606 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5607 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5608 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5609 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5610 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5611 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5612 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5613 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5614 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_u8(uint8_t const * a,uint8x8x3_t b)5615 uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) {
5616   return vld3_lane_u8(a, b, 7);
5617 }
5618 
5619 // CHECK-LABEL: @test_vld3_lane_u16(
5620 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
5621 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
5622 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5623 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
5624 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5625 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5626 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
5627 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
5628 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5629 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
5630 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5631 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5632 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5633 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5634 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5635 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5636 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5637 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5638 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5639 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5640 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5641 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5642 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5643 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5644 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5645 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5646 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_u16(uint16_t const * a,uint16x4x3_t b)5647 uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) {
5648   return vld3_lane_u16(a, b, 3);
5649 }
5650 
5651 // CHECK-LABEL: @test_vld3_lane_u32(
5652 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
5653 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
5654 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5655 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
5656 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
5657 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5658 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
5659 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
5660 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5661 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
5662 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5663 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5664 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
5665 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5666 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5667 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5668 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5669 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5670 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5671 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5672 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
5673 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
5674 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5675 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5676 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5677 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5678 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_lane_u32(uint32_t const * a,uint32x2x3_t b)5679 uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) {
5680   return vld3_lane_u32(a, b, 1);
5681 }
5682 
5683 // CHECK-LABEL: @test_vld3_lane_s8(
5684 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
5685 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
5686 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5687 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
5688 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5689 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5690 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
5691 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
5692 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5693 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
5694 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5695 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5696 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5697 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5698 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5699 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5700 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5701 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5702 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5703 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_s8(int8_t const * a,int8x8x3_t b)5704 int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) {
5705   return vld3_lane_s8(a, b, 7);
5706 }
5707 
5708 // CHECK-LABEL: @test_vld3_lane_s16(
5709 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
5710 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
5711 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5712 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
5713 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5714 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5715 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
5716 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
5717 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5718 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
5719 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5720 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5721 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5722 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5723 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5724 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5725 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5726 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5727 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5728 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5729 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5730 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5731 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5732 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5733 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5734 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5735 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_s16(int16_t const * a,int16x4x3_t b)5736 int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) {
5737   return vld3_lane_s16(a, b, 3);
5738 }
5739 
5740 // CHECK-LABEL: @test_vld3_lane_s32(
5741 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
5742 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
5743 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5744 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
5745 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
5746 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5747 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
5748 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
5749 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5750 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
5751 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5752 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5753 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
5754 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5755 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5756 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5757 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5758 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5759 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5760 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5761 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
5762 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
5763 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5764 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5765 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5766 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5767 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_lane_s32(int32_t const * a,int32x2x3_t b)5768 int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
5769   return vld3_lane_s32(a, b, 1);
5770 }
5771 
5772 // CHECK-LABEL: @test_vld3_lane_f16(
5773 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
5774 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
5775 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5776 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
5777 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
5778 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5779 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
5780 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
5781 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5782 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
5783 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
5784 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5785 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
5786 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
5787 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5788 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5789 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
5790 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
5791 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5792 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5793 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
5794 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
5795 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
5796 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
5797 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
5798 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
5799 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
test_vld3_lane_f16(float16_t const * a,float16x4x3_t b)5800 float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
5801   return vld3_lane_f16(a, b, 3);
5802 }
5803 
5804 // CHECK-LABEL: @test_vld3_lane_f32(
5805 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
5806 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
5807 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5808 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
5809 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
5810 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5811 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
5812 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
5813 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5814 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
5815 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
5816 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5817 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
5818 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
5819 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5820 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5821 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
5822 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
5823 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5824 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5825 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
5826 // CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
5827 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
5828 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5829 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5830 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
5831 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
test_vld3_lane_f32(float32_t const * a,float32x2x3_t b)5832 float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) {
5833   return vld3_lane_f32(a, b, 1);
5834 }
5835 
5836 // CHECK-LABEL: @test_vld3_lane_p8(
5837 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
5838 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
5839 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5840 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
5841 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5842 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5843 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
5844 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
5845 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5846 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
5847 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5848 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5849 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5850 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5851 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5852 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5853 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5854 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5855 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5856 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_p8(poly8_t const * a,poly8x8x3_t b)5857 poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) {
5858   return vld3_lane_p8(a, b, 7);
5859 }
5860 
5861 // CHECK-LABEL: @test_vld3_lane_p16(
5862 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
5863 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
5864 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5865 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
5866 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5867 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5868 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
5869 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
5870 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5871 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
5872 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5873 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5874 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5875 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5876 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5877 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5878 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5879 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5880 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5881 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5882 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5883 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5884 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5885 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5886 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5887 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5888 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_p16(poly16_t const * a,poly16x4x3_t b)5889 poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) {
5890   return vld3_lane_p16(a, b, 3);
5891 }
5892 
5893 // CHECK-LABEL: @test_vld4q_u8(
5894 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
5895 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
5896 // CHECK:   [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_u8(uint8_t const * a)5897 uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
5898   return vld4q_u8(a);
5899 }
5900 
5901 // CHECK-LABEL: @test_vld4q_u16(
5902 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
5903 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
5904 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5905 // CHECK:   [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_u16(uint16_t const * a)5906 uint16x8x4_t test_vld4q_u16(uint16_t const * a) {
5907   return vld4q_u16(a);
5908 }
5909 
5910 // CHECK-LABEL: @test_vld4q_u32(
5911 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
5912 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
5913 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5914 // CHECK:   [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_u32(uint32_t const * a)5915 uint32x4x4_t test_vld4q_u32(uint32_t const * a) {
5916   return vld4q_u32(a);
5917 }
5918 
5919 // CHECK-LABEL: @test_vld4q_s8(
5920 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
5921 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
5922 // CHECK:   [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_s8(int8_t const * a)5923 int8x16x4_t test_vld4q_s8(int8_t const * a) {
5924   return vld4q_s8(a);
5925 }
5926 
5927 // CHECK-LABEL: @test_vld4q_s16(
5928 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
5929 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
5930 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5931 // CHECK:   [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_s16(int16_t const * a)5932 int16x8x4_t test_vld4q_s16(int16_t const * a) {
5933   return vld4q_s16(a);
5934 }
5935 
5936 // CHECK-LABEL: @test_vld4q_s32(
5937 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
5938 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
5939 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5940 // CHECK:   [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_s32(int32_t const * a)5941 int32x4x4_t test_vld4q_s32(int32_t const * a) {
5942   return vld4q_s32(a);
5943 }
5944 
5945 // CHECK-LABEL: @test_vld4q_f16(
5946 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
5947 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
5948 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
5949 // CHECK:   [[VLD4Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
test_vld4q_f16(float16_t const * a)5950 float16x8x4_t test_vld4q_f16(float16_t const * a) {
5951   return vld4q_f16(a);
5952 }
5953 
5954 // CHECK-LABEL: @test_vld4q_f32(
5955 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
5956 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
5957 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
5958 // CHECK:   [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
test_vld4q_f32(float32_t const * a)5959 float32x4x4_t test_vld4q_f32(float32_t const * a) {
5960   return vld4q_f32(a);
5961 }
5962 
5963 // CHECK-LABEL: @test_vld4q_p8(
5964 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
5965 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
5966 // CHECK:   [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_p8(poly8_t const * a)5967 poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
5968   return vld4q_p8(a);
5969 }
5970 
5971 // CHECK-LABEL: @test_vld4q_p16(
5972 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
5973 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
5974 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5975 // CHECK:   [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_p16(poly16_t const * a)5976 poly16x8x4_t test_vld4q_p16(poly16_t const * a) {
5977   return vld4q_p16(a);
5978 }
5979 
5980 // CHECK-LABEL: @test_vld4_u8(
5981 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
5982 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
5983 // CHECK:   [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_u8(uint8_t const * a)5984 uint8x8x4_t test_vld4_u8(uint8_t const * a) {
5985   return vld4_u8(a);
5986 }
5987 
5988 // CHECK-LABEL: @test_vld4_u16(
5989 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
5990 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
5991 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5992 // CHECK:   [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_u16(uint16_t const * a)5993 uint16x4x4_t test_vld4_u16(uint16_t const * a) {
5994   return vld4_u16(a);
5995 }
5996 
5997 // CHECK-LABEL: @test_vld4_u32(
5998 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
5999 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
6000 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
6001 // CHECK:   [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_u32(uint32_t const * a)6002 uint32x2x4_t test_vld4_u32(uint32_t const * a) {
6003   return vld4_u32(a);
6004 }
6005 
6006 // CHECK-LABEL: @test_vld4_u64(
6007 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
6008 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
6009 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
6010 // CHECK:   [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
test_vld4_u64(uint64_t const * a)6011 uint64x1x4_t test_vld4_u64(uint64_t const * a) {
6012   return vld4_u64(a);
6013 }
6014 
6015 // CHECK-LABEL: @test_vld4_s8(
6016 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
6017 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
6018 // CHECK:   [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_s8(int8_t const * a)6019 int8x8x4_t test_vld4_s8(int8_t const * a) {
6020   return vld4_s8(a);
6021 }
6022 
6023 // CHECK-LABEL: @test_vld4_s16(
6024 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
6025 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
6026 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
6027 // CHECK:   [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_s16(int16_t const * a)6028 int16x4x4_t test_vld4_s16(int16_t const * a) {
6029   return vld4_s16(a);
6030 }
6031 
6032 // CHECK-LABEL: @test_vld4_s32(
6033 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
6034 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
6035 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
6036 // CHECK:   [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_s32(int32_t const * a)6037 int32x2x4_t test_vld4_s32(int32_t const * a) {
6038   return vld4_s32(a);
6039 }
6040 
6041 // CHECK-LABEL: @test_vld4_s64(
6042 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
6043 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
6044 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
6045 // CHECK:   [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
test_vld4_s64(int64_t const * a)6046 int64x1x4_t test_vld4_s64(int64_t const * a) {
6047   return vld4_s64(a);
6048 }
6049 
6050 // CHECK-LABEL: @test_vld4_f16(
6051 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
6052 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
6053 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
6054 // CHECK:   [[VLD4_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
test_vld4_f16(float16_t const * a)6055 float16x4x4_t test_vld4_f16(float16_t const * a) {
6056   return vld4_f16(a);
6057 }
6058 
6059 // CHECK-LABEL: @test_vld4_f32(
6060 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
6061 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
6062 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
6063 // CHECK:   [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
test_vld4_f32(float32_t const * a)6064 float32x2x4_t test_vld4_f32(float32_t const * a) {
6065   return vld4_f32(a);
6066 }
6067 
6068 // CHECK-LABEL: @test_vld4_p8(
6069 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
6070 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
6071 // CHECK:   [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_p8(poly8_t const * a)6072 poly8x8x4_t test_vld4_p8(poly8_t const * a) {
6073   return vld4_p8(a);
6074 }
6075 
6076 // CHECK-LABEL: @test_vld4_p16(
6077 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
6078 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
6079 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
6080 // CHECK:   [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_p16(poly16_t const * a)6081 poly16x4x4_t test_vld4_p16(poly16_t const * a) {
6082   return vld4_p16(a);
6083 }
6084 
6085 // CHECK-LABEL: @test_vld4q_lane_u16(
6086 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
6087 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
6088 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
6089 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
6090 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6091 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6092 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
6093 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
6094 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6095 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
6096 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6097 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6098 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6099 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6100 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6101 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6102 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6103 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6104 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6105 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6106 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6107 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6108 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6109 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6110 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6111 // CHECK:   [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6112 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6113 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6114 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6115 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6116 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6117 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_u16(uint16_t const * a,uint16x8x4_t b)6118 uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) {
6119   return vld4q_lane_u16(a, b, 7);
6120 }
6121 
6122 // CHECK-LABEL: @test_vld4q_lane_u32(
6123 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
6124 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
6125 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
6126 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
6127 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
6128 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6129 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
6130 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
6131 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6132 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
6133 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
6134 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6135 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
6136 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6137 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6138 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6139 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6140 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6141 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6142 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6143 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6144 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6145 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6146 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6147 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
6148 // CHECK:   [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
6149 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
6150 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6151 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6152 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6153 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
6154 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_lane_u32(uint32_t const * a,uint32x4x4_t b)6155 uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) {
6156   return vld4q_lane_u32(a, b, 3);
6157 }
6158 
6159 // CHECK-LABEL: @test_vld4q_lane_s16(
6160 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
6161 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
6162 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
6163 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
6164 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6165 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6166 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
6167 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
6168 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6169 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
6170 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6171 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6172 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6173 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6174 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6175 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6176 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6177 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6178 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6179 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6180 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6181 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6182 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6183 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6184 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6185 // CHECK:   [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6186 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6187 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6188 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6189 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6190 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6191 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_s16(int16_t const * a,int16x8x4_t b)6192 int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) {
6193   return vld4q_lane_s16(a, b, 7);
6194 }
6195 
6196 // CHECK-LABEL: @test_vld4q_lane_s32(
6197 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
6198 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
6199 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
6200 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
6201 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
6202 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6203 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
6204 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
6205 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6206 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
6207 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
6208 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6209 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
6210 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6211 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6212 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6213 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6214 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6215 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6216 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6217 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6218 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6219 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6220 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6221 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
6222 // CHECK:   [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
6223 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
6224 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6225 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6226 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6227 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
6228 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_lane_s32(int32_t const * a,int32x4x4_t b)6229 int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
6230   return vld4q_lane_s32(a, b, 3);
6231 }
6232 
6233 // CHECK-LABEL: @test_vld4q_lane_f16(
6234 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
6235 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
6236 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
6237 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
6238 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
6239 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6240 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
6241 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
6242 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6243 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
6244 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
6245 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6246 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
6247 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
6248 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
6249 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6250 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
6251 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
6252 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
6253 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6254 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
6255 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
6256 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
6257 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6258 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
6259 // CHECK:   [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
6260 // CHECK:   [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8>
6261 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
6262 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
6263 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
6264 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x half>
6265 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
test_vld4q_lane_f16(float16_t const * a,float16x8x4_t b)6266 float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
6267   return vld4q_lane_f16(a, b, 7);
6268 }
6269 
6270 // CHECK-LABEL: @test_vld4q_lane_f32(
6271 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
6272 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
6273 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
6274 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
6275 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
6276 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6277 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
6278 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
6279 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6280 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
6281 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
6282 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6283 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
6284 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
6285 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
6286 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6287 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
6288 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
6289 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
6290 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6291 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
6292 // CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
6293 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
6294 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6295 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
6296 // CHECK:   [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
6297 // CHECK:   [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8>
6298 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
6299 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
6300 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
6301 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float>
6302 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
test_vld4q_lane_f32(float32_t const * a,float32x4x4_t b)6303 float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) {
6304   return vld4q_lane_f32(a, b, 3);
6305 }
6306 
6307 // CHECK-LABEL: @test_vld4q_lane_p16(
6308 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
6309 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
6310 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
6311 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
6312 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6313 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6314 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
6315 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
6316 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6317 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
6318 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6319 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6320 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6321 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6322 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6323 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6324 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6325 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6326 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6327 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6328 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6329 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6330 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6331 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6332 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6333 // CHECK:   [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6334 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6335 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6336 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6337 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6338 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6339 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_p16(poly16_t const * a,poly16x8x4_t b)6340 poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) {
6341   return vld4q_lane_p16(a, b, 7);
6342 }
6343 
6344 // CHECK-LABEL: @test_vld4_lane_u8(
6345 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
6346 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
6347 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
6348 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
6349 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6350 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6351 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
6352 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
6353 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6354 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
6355 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6356 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6357 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6358 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6359 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6360 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6361 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6362 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6363 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6364 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6365 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6366 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6367 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_u8(uint8_t const * a,uint8x8x4_t b)6368 uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) {
6369   return vld4_lane_u8(a, b, 7);
6370 }
6371 
6372 // CHECK-LABEL: @test_vld4_lane_u16(
6373 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
6374 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
6375 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
6376 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
6377 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6378 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6379 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
6380 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
6381 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6382 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
6383 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6384 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6385 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6386 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6387 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6388 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6389 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6390 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6391 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6392 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6393 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6394 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6395 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6396 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6397 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6398 // CHECK:   [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6399 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6400 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6401 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6402 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6403 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6404 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_u16(uint16_t const * a,uint16x4x4_t b)6405 uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) {
6406   return vld4_lane_u16(a, b, 3);
6407 }
6408 
6409 // CHECK-LABEL: @test_vld4_lane_u32(
6410 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
6411 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
6412 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
6413 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
6414 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
6415 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6416 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
6417 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
6418 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6419 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
6420 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
6421 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6422 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
6423 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6424 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6425 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6426 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6427 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6428 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6429 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6430 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
6431 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
6432 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6433 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6434 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
6435 // CHECK:   [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
6436 // CHECK:   [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6437 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6438 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6439 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6440 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6441 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_lane_u32(uint32_t const * a,uint32x2x4_t b)6442 uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) {
6443   return vld4_lane_u32(a, b, 1);
6444 }
6445 
6446 // CHECK-LABEL: @test_vld4_lane_s8(
6447 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
6448 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
6449 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
6450 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
6451 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6452 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6453 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
6454 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
6455 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6456 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
6457 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6458 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6459 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6460 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6461 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6462 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6463 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6464 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6465 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6466 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6467 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6468 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6469 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_s8(int8_t const * a,int8x8x4_t b)6470 int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) {
6471   return vld4_lane_s8(a, b, 7);
6472 }
6473 
6474 // CHECK-LABEL: @test_vld4_lane_s16(
6475 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
6476 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
6477 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
6478 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
6479 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6480 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6481 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
6482 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
6483 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6484 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
6485 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6486 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6487 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6488 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6489 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6490 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6491 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6492 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6493 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6494 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6495 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6496 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6497 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6498 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6499 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6500 // CHECK:   [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6501 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6502 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6503 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6504 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6505 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6506 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_s16(int16_t const * a,int16x4x4_t b)6507 int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) {
6508   return vld4_lane_s16(a, b, 3);
6509 }
6510 
6511 // CHECK-LABEL: @test_vld4_lane_s32(
6512 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
6513 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
6514 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
6515 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
6516 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
6517 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6518 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
6519 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
6520 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6521 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
6522 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
6523 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6524 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
6525 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6526 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6527 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6528 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6529 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6530 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6531 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6532 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
6533 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
6534 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6535 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6536 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
6537 // CHECK:   [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
6538 // CHECK:   [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6539 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6540 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6541 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6542 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6543 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_lane_s32(int32_t const * a,int32x2x4_t b)6544 int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
6545   return vld4_lane_s32(a, b, 1);
6546 }
6547 
6548 // CHECK-LABEL: @test_vld4_lane_f16(
6549 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
6550 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
6551 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
6552 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
6553 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
6554 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6555 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
6556 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
6557 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6558 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
6559 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
6560 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6561 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
6562 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
6563 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
6564 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6565 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
6566 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
6567 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
6568 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6569 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
6570 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
6571 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
6572 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6573 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
6574 // CHECK:   [[TMP11:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
6575 // CHECK:   [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8>
6576 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
6577 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
6578 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
6579 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
6580 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
test_vld4_lane_f16(float16_t const * a,float16x4x4_t b)6581 float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
6582   return vld4_lane_f16(a, b, 3);
6583 }
6584 
6585 // CHECK-LABEL: @test_vld4_lane_f32(
6586 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
6587 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
6588 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
6589 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
6590 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
6591 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6592 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
6593 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
6594 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6595 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
6596 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
6597 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6598 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
6599 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
6600 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
6601 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6602 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
6603 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
6604 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
6605 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6606 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
6607 // CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
6608 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
6609 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6610 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
6611 // CHECK:   [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
6612 // CHECK:   [[TMP12:%.*]] = bitcast <2 x float> [[TMP11]] to <8 x i8>
6613 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
6614 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
6615 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
6616 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float>
6617 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
test_vld4_lane_f32(float32_t const * a,float32x2x4_t b)6618 float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) {
6619   return vld4_lane_f32(a, b, 1);
6620 }
6621 
6622 // CHECK-LABEL: @test_vld4_lane_p8(
6623 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
6624 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
6625 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
6626 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
6627 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6628 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6629 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
6630 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
6631 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6632 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
6633 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6634 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6635 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6636 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6637 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6638 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6639 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6640 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6641 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6642 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6643 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6644 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6645 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_p8(poly8_t const * a,poly8x8x4_t b)6646 poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) {
6647   return vld4_lane_p8(a, b, 7);
6648 }
6649 
6650 // CHECK-LABEL: @test_vld4_lane_p16(
6651 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
6652 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
6653 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
6654 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
6655 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6656 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6657 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
6658 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
6659 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6660 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
6661 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6662 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6663 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6664 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6665 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6666 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6667 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6668 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6669 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6670 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6671 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6672 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6673 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6674 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6675 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6676 // CHECK:   [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6677 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6678 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6679 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6680 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6681 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6682 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_p16(poly16_t const * a,poly16x4x4_t b)6683 poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) {
6684   return vld4_lane_p16(a, b, 3);
6685 }
6686 
6687 // CHECK-LABEL: @test_vmax_s8(
6688 // CHECK:   [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
6689 // CHECK:   ret <8 x i8> [[VMAX_V_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)6690 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
6691   return vmax_s8(a, b);
6692 }
6693 
6694 // CHECK-LABEL: @test_vmax_s16(
6695 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6696 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6697 // CHECK:   [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
6698 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6699 // CHECK:   ret <4 x i16> [[VMAX_V2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)6700 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
6701   return vmax_s16(a, b);
6702 }
6703 
6704 // CHECK-LABEL: @test_vmax_s32(
6705 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6706 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6707 // CHECK:   [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
6708 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6709 // CHECK:   ret <2 x i32> [[VMAX_V2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)6710 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
6711   return vmax_s32(a, b);
6712 }
6713 
6714 // CHECK-LABEL: @test_vmax_u8(
6715 // CHECK:   [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
6716 // CHECK:   ret <8 x i8> [[VMAX_V_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)6717 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
6718   return vmax_u8(a, b);
6719 }
6720 
6721 // CHECK-LABEL: @test_vmax_u16(
6722 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6723 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6724 // CHECK:   [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
6725 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6726 // CHECK:   ret <4 x i16> [[VMAX_V2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)6727 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
6728   return vmax_u16(a, b);
6729 }
6730 
6731 // CHECK-LABEL: @test_vmax_u32(
6732 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6733 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6734 // CHECK:   [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
6735 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6736 // CHECK:   ret <2 x i32> [[VMAX_V2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)6737 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
6738   return vmax_u32(a, b);
6739 }
6740 
6741 // CHECK-LABEL: @test_vmax_f32(
6742 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6743 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6744 // CHECK:   [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b)
6745 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8>
6746 // CHECK:   ret <2 x float> [[VMAX_V2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)6747 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
6748   return vmax_f32(a, b);
6749 }
6750 
6751 // CHECK-LABEL: @test_vmaxq_s8(
6752 // CHECK:   [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b)
6753 // CHECK:   ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)6754 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
6755   return vmaxq_s8(a, b);
6756 }
6757 
6758 // CHECK-LABEL: @test_vmaxq_s16(
6759 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6760 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6761 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b)
6762 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6763 // CHECK:   ret <8 x i16> [[VMAXQ_V2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)6764 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
6765   return vmaxq_s16(a, b);
6766 }
6767 
6768 // CHECK-LABEL: @test_vmaxq_s32(
6769 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6770 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6771 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b)
6772 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6773 // CHECK:   ret <4 x i32> [[VMAXQ_V2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)6774 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
6775   return vmaxq_s32(a, b);
6776 }
6777 
6778 // CHECK-LABEL: @test_vmaxq_u8(
6779 // CHECK:   [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b)
6780 // CHECK:   ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)6781 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
6782   return vmaxq_u8(a, b);
6783 }
6784 
6785 // CHECK-LABEL: @test_vmaxq_u16(
6786 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6787 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6788 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b)
6789 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6790 // CHECK:   ret <8 x i16> [[VMAXQ_V2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)6791 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
6792   return vmaxq_u16(a, b);
6793 }
6794 
6795 // CHECK-LABEL: @test_vmaxq_u32(
6796 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6797 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6798 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b)
6799 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6800 // CHECK:   ret <4 x i32> [[VMAXQ_V2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)6801 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
6802   return vmaxq_u32(a, b);
6803 }
6804 
6805 // CHECK-LABEL: @test_vmaxq_f32(
6806 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6807 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6808 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b)
6809 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8>
6810 // CHECK:   ret <4 x float> [[VMAXQ_V2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)6811 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
6812   return vmaxq_f32(a, b);
6813 }
6814 
6815 // CHECK-LABEL: @test_vmin_s8(
6816 // CHECK:   [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b)
6817 // CHECK:   ret <8 x i8> [[VMIN_V_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)6818 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
6819   return vmin_s8(a, b);
6820 }
6821 
6822 // CHECK-LABEL: @test_vmin_s16(
6823 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6824 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6825 // CHECK:   [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b)
6826 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6827 // CHECK:   ret <4 x i16> [[VMIN_V2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)6828 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
6829   return vmin_s16(a, b);
6830 }
6831 
6832 // CHECK-LABEL: @test_vmin_s32(
6833 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6834 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6835 // CHECK:   [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b)
6836 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6837 // CHECK:   ret <2 x i32> [[VMIN_V2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)6838 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
6839   return vmin_s32(a, b);
6840 }
6841 
6842 // CHECK-LABEL: @test_vmin_u8(
6843 // CHECK:   [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b)
6844 // CHECK:   ret <8 x i8> [[VMIN_V_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)6845 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
6846   return vmin_u8(a, b);
6847 }
6848 
6849 // CHECK-LABEL: @test_vmin_u16(
6850 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6851 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6852 // CHECK:   [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b)
6853 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6854 // CHECK:   ret <4 x i16> [[VMIN_V2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)6855 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
6856   return vmin_u16(a, b);
6857 }
6858 
6859 // CHECK-LABEL: @test_vmin_u32(
6860 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6861 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6862 // CHECK:   [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b)
6863 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6864 // CHECK:   ret <2 x i32> [[VMIN_V2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)6865 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
6866   return vmin_u32(a, b);
6867 }
6868 
6869 // CHECK-LABEL: @test_vmin_f32(
6870 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6871 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6872 // CHECK:   [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b)
6873 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8>
6874 // CHECK:   ret <2 x float> [[VMIN_V2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)6875 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
6876   return vmin_f32(a, b);
6877 }
6878 
6879 // CHECK-LABEL: @test_vminq_s8(
6880 // CHECK:   [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b)
6881 // CHECK:   ret <16 x i8> [[VMINQ_V_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)6882 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
6883   return vminq_s8(a, b);
6884 }
6885 
6886 // CHECK-LABEL: @test_vminq_s16(
6887 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6888 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6889 // CHECK:   [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b)
6890 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6891 // CHECK:   ret <8 x i16> [[VMINQ_V2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)6892 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
6893   return vminq_s16(a, b);
6894 }
6895 
6896 // CHECK-LABEL: @test_vminq_s32(
6897 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6898 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6899 // CHECK:   [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b)
6900 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6901 // CHECK:   ret <4 x i32> [[VMINQ_V2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)6902 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
6903   return vminq_s32(a, b);
6904 }
6905 
6906 // CHECK-LABEL: @test_vminq_u8(
6907 // CHECK:   [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b)
6908 // CHECK:   ret <16 x i8> [[VMINQ_V_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)6909 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
6910   return vminq_u8(a, b);
6911 }
6912 
6913 // CHECK-LABEL: @test_vminq_u16(
6914 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6915 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6916 // CHECK:   [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b)
6917 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6918 // CHECK:   ret <8 x i16> [[VMINQ_V2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)6919 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
6920   return vminq_u16(a, b);
6921 }
6922 
6923 // CHECK-LABEL: @test_vminq_u32(
6924 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6925 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6926 // CHECK:   [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b)
6927 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6928 // CHECK:   ret <4 x i32> [[VMINQ_V2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)6929 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
6930   return vminq_u32(a, b);
6931 }
6932 
6933 // CHECK-LABEL: @test_vminq_f32(
6934 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6935 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6936 // CHECK:   [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b)
6937 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8>
6938 // CHECK:   ret <4 x float> [[VMINQ_V2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)6939 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
6940   return vminq_f32(a, b);
6941 }
6942 
6943 // CHECK-LABEL: @test_vmla_s8(
6944 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %b, %c
6945 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
6946 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t a,int8x8_t b,int8x8_t c)6947 int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
6948   return vmla_s8(a, b, c);
6949 }
6950 
6951 // CHECK-LABEL: @test_vmla_s16(
6952 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, %c
6953 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
6954 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_s16(int16x4_t a,int16x4_t b,int16x4_t c)6955 int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
6956   return vmla_s16(a, b, c);
6957 }
6958 
6959 // CHECK-LABEL: @test_vmla_s32(
6960 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, %c
6961 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
6962 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t a,int32x2_t b,int32x2_t c)6963 int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
6964   return vmla_s32(a, b, c);
6965 }
6966 
6967 // CHECK-LABEL: @test_vmla_f32(
6968 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, %c
6969 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
6970 // CHECK:   ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t a,float32x2_t b,float32x2_t c)6971 float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
6972   return vmla_f32(a, b, c);
6973 }
6974 
6975 // CHECK-LABEL: @test_vmla_u8(
6976 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %b, %c
6977 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
6978 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)6979 uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
6980   return vmla_u8(a, b, c);
6981 }
6982 
6983 // CHECK-LABEL: @test_vmla_u16(
6984 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, %c
6985 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
6986 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)6987 uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
6988   return vmla_u16(a, b, c);
6989 }
6990 
6991 // CHECK-LABEL: @test_vmla_u32(
6992 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, %c
6993 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
6994 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)6995 uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
6996   return vmla_u32(a, b, c);
6997 }
6998 
6999 // CHECK-LABEL: @test_vmlaq_s8(
7000 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7001 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
7002 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)7003 int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
7004   return vmlaq_s8(a, b, c);
7005 }
7006 
7007 // CHECK-LABEL: @test_vmlaq_s16(
7008 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7009 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7010 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)7011 int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
7012   return vmlaq_s16(a, b, c);
7013 }
7014 
7015 // CHECK-LABEL: @test_vmlaq_s32(
7016 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7017 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7018 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)7019 int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
7020   return vmlaq_s32(a, b, c);
7021 }
7022 
7023 // CHECK-LABEL: @test_vmlaq_f32(
7024 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, %c
7025 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
7026 // CHECK:   ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7027 float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
7028   return vmlaq_f32(a, b, c);
7029 }
7030 
7031 // CHECK-LABEL: @test_vmlaq_u8(
7032 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7033 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
7034 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)7035 uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
7036   return vmlaq_u8(a, b, c);
7037 }
7038 
7039 // CHECK-LABEL: @test_vmlaq_u16(
7040 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7041 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7042 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)7043 uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
7044   return vmlaq_u16(a, b, c);
7045 }
7046 
7047 // CHECK-LABEL: @test_vmlaq_u32(
7048 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7049 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7050 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)7051 uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
7052   return vmlaq_u32(a, b, c);
7053 }
7054 
7055 // CHECK-LABEL: @test_vmlal_s8(
7056 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
7057 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7058 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7059 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7060   return vmlal_s8(a, b, c);
7061 }
7062 
7063 // CHECK-LABEL: @test_vmlal_s16(
7064 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7065 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7066 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
7067 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7068 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7069 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7070   return vmlal_s16(a, b, c);
7071 }
7072 
7073 // CHECK-LABEL: @test_vmlal_s32(
7074 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7075 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7076 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
7077 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7078 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7079 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7080   return vmlal_s32(a, b, c);
7081 }
7082 
7083 // CHECK-LABEL: @test_vmlal_u8(
7084 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
7085 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7086 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7087 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7088   return vmlal_u8(a, b, c);
7089 }
7090 
7091 // CHECK-LABEL: @test_vmlal_u16(
7092 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7093 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7094 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
7095 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7096 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7097 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7098   return vmlal_u16(a, b, c);
7099 }
7100 
7101 // CHECK-LABEL: @test_vmlal_u32(
7102 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7103 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7104 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
7105 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7106 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7107 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7108   return vmlal_u32(a, b, c);
7109 }
7110 
7111 // CHECK-LABEL: @test_vmlal_lane_s16(
7112 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7113 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7114 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7115 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7116 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7117 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
7118 // CHECK:   [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
7119 // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)7120 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7121   return vmlal_lane_s16(a, b, c, 3);
7122 }
7123 
7124 // CHECK-LABEL: @test_vmlal_lane_s32(
7125 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7126 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7127 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7128 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7129 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7130 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
7131 // CHECK:   [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
7132 // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)7133 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7134   return vmlal_lane_s32(a, b, c, 1);
7135 }
7136 
7137 // CHECK-LABEL: @test_vmlal_lane_u16(
7138 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7139 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7140 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7141 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7142 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7143 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
7144 // CHECK:   [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
7145 // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7146 uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7147   return vmlal_lane_u16(a, b, c, 3);
7148 }
7149 
7150 // CHECK-LABEL: @test_vmlal_lane_u32(
7151 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7152 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7153 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7154 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7155 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7156 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
7157 // CHECK:   [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
7158 // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7159 uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7160   return vmlal_lane_u32(a, b, c, 1);
7161 }
7162 
7163 // CHECK-LABEL: @test_vmlal_n_s16(
7164 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7165 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7166 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7167 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7168 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7169 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7170 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7171 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7172 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)7173 int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
7174   return vmlal_n_s16(a, b, c);
7175 }
7176 
7177 // CHECK-LABEL: @test_vmlal_n_s32(
7178 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7179 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7180 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7181 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7182 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7183 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7184 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)7185 int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
7186   return vmlal_n_s32(a, b, c);
7187 }
7188 
7189 // CHECK-LABEL: @test_vmlal_n_u16(
7190 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7191 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7192 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7193 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7194 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7195 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7196 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7197 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7198 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)7199 uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
7200   return vmlal_n_u16(a, b, c);
7201 }
7202 
7203 // CHECK-LABEL: @test_vmlal_n_u32(
7204 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7205 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7206 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7207 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7208 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7209 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7210 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)7211 uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
7212   return vmlal_n_u32(a, b, c);
7213 }
7214 
7215 // CHECK-LABEL: @test_vmla_lane_s16(
7216 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7217 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7218 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7219 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7220 // CHECK:   [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
7221 // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)7222 int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7223   return vmla_lane_s16(a, b, c, 3);
7224 }
7225 
7226 // CHECK-LABEL: @test_vmla_lane_s32(
7227 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7228 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7229 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7230 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7231 // CHECK:   [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
7232 // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)7233 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7234   return vmla_lane_s32(a, b, c, 1);
7235 }
7236 
7237 // CHECK-LABEL: @test_vmla_lane_u16(
7238 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7239 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7240 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7241 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7242 // CHECK:   [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
7243 // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7244 uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7245   return vmla_lane_u16(a, b, c, 3);
7246 }
7247 
7248 // CHECK-LABEL: @test_vmla_lane_u32(
7249 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7250 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7251 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7252 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7253 // CHECK:   [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
7254 // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7255 uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7256   return vmla_lane_u32(a, b, c, 1);
7257 }
7258 
7259 // CHECK-LABEL: @test_vmla_lane_f32(
7260 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7261 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7262 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
7263 // CHECK:   [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
7264 // CHECK:   [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
7265 // CHECK:   ret <2 x float> [[ADD]]
test_vmla_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)7266 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7267   return vmla_lane_f32(a, b, c, 1);
7268 }
7269 
7270 // CHECK-LABEL: @test_vmlaq_lane_s16(
7271 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7272 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7273 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7274 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7275 // CHECK:   [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
7276 // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)7277 int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
7278   return vmlaq_lane_s16(a, b, c, 3);
7279 }
7280 
7281 // CHECK-LABEL: @test_vmlaq_lane_s32(
7282 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7283 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7284 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7285 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7286 // CHECK:   [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
7287 // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)7288 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
7289   return vmlaq_lane_s32(a, b, c, 1);
7290 }
7291 
7292 // CHECK-LABEL: @test_vmlaq_lane_u16(
7293 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7294 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7295 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7296 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7297 // CHECK:   [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
7298 // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)7299 uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
7300   return vmlaq_lane_u16(a, b, c, 3);
7301 }
7302 
7303 // CHECK-LABEL: @test_vmlaq_lane_u32(
7304 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7305 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7306 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7307 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7308 // CHECK:   [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
7309 // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)7310 uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
7311   return vmlaq_lane_u32(a, b, c, 1);
7312 }
7313 
7314 // CHECK-LABEL: @test_vmlaq_lane_f32(
7315 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7316 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7317 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7318 // CHECK:   [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
7319 // CHECK:   [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
7320 // CHECK:   ret <4 x float> [[ADD]]
test_vmlaq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)7321 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
7322   return vmlaq_lane_f32(a, b, c, 1);
7323 }
7324 
7325 // CHECK-LABEL: @test_vmla_n_s16(
7326 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7327 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7328 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7329 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7330 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7331 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7332 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_n_s16(int16x4_t a,int16x4_t b,int16_t c)7333 int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
7334   return vmla_n_s16(a, b, c);
7335 }
7336 
7337 // CHECK-LABEL: @test_vmla_n_s32(
7338 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7339 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7340 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7341 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7342 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_n_s32(int32x2_t a,int32x2_t b,int32_t c)7343 int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
7344   return vmla_n_s32(a, b, c);
7345 }
7346 
7347 // CHECK-LABEL: @test_vmla_n_u16(
7348 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7349 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7350 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7351 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7352 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7353 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7354 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)7355 uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
7356   return vmla_n_u16(a, b, c);
7357 }
7358 
7359 // CHECK-LABEL: @test_vmla_n_u32(
7360 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7361 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7362 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7363 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7364 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)7365 uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
7366   return vmla_n_u32(a, b, c);
7367 }
7368 
7369 // CHECK-LABEL: @test_vmla_n_f32(
7370 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
7371 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
7372 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
7373 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
7374 // CHECK:   ret <2 x float> [[ADD_I]]
test_vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)7375 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
7376   return vmla_n_f32(a, b, c);
7377 }
7378 
7379 // CHECK-LABEL: @test_vmlaq_n_s16(
7380 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7381 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7382 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7383 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7384 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7385 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7386 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7387 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7388 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7389 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7390 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_n_s16(int16x8_t a,int16x8_t b,int16_t c)7391 int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
7392   return vmlaq_n_s16(a, b, c);
7393 }
7394 
7395 // CHECK-LABEL: @test_vmlaq_n_s32(
7396 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7397 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7398 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7399 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7400 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7401 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7402 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_n_s32(int32x4_t a,int32x4_t b,int32_t c)7403 int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7404   return vmlaq_n_s32(a, b, c);
7405 }
7406 
7407 // CHECK-LABEL: @test_vmlaq_n_u16(
7408 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7409 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7410 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7411 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7412 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7413 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7414 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7415 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7416 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7417 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7418 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)7419 uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7420   return vmlaq_n_u16(a, b, c);
7421 }
7422 
7423 // CHECK-LABEL: @test_vmlaq_n_u32(
7424 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7425 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7426 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7427 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7428 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7429 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7430 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)7431 uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7432   return vmlaq_n_u32(a, b, c);
7433 }
7434 
7435 // CHECK-LABEL: @test_vmlaq_n_f32(
7436 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7437 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7438 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7439 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7440 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7441 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
7442 // CHECK:   ret <4 x float> [[ADD_I]]
test_vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7443 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7444   return vmlaq_n_f32(a, b, c);
7445 }
7446 
7447 // CHECK-LABEL: @test_vmls_s8(
7448 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7449 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7450 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t a,int8x8_t b,int8x8_t c)7451 int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
7452   return vmls_s8(a, b, c);
7453 }
7454 
7455 // CHECK-LABEL: @test_vmls_s16(
7456 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7457 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7458 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_s16(int16x4_t a,int16x4_t b,int16x4_t c)7459 int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7460   return vmls_s16(a, b, c);
7461 }
7462 
7463 // CHECK-LABEL: @test_vmls_s32(
7464 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7465 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7466 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t a,int32x2_t b,int32x2_t c)7467 int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7468   return vmls_s32(a, b, c);
7469 }
7470 
7471 // CHECK-LABEL: @test_vmls_f32(
7472 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, %c
7473 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7474 // CHECK:   ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t a,float32x2_t b,float32x2_t c)7475 float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7476   return vmls_f32(a, b, c);
7477 }
7478 
7479 // CHECK-LABEL: @test_vmls_u8(
7480 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7481 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7482 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)7483 uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
7484   return vmls_u8(a, b, c);
7485 }
7486 
7487 // CHECK-LABEL: @test_vmls_u16(
7488 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7489 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7490 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7491 uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7492   return vmls_u16(a, b, c);
7493 }
7494 
7495 // CHECK-LABEL: @test_vmls_u32(
7496 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7497 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7498 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7499 uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7500   return vmls_u32(a, b, c);
7501 }
7502 
7503 // CHECK-LABEL: @test_vmlsq_s8(
7504 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7505 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7506 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t a,int8x16_t b,int8x16_t c)7507 int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
7508   return vmlsq_s8(a, b, c);
7509 }
7510 
7511 // CHECK-LABEL: @test_vmlsq_s16(
7512 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7513 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7514 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t a,int16x8_t b,int16x8_t c)7515 int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
7516   return vmlsq_s16(a, b, c);
7517 }
7518 
7519 // CHECK-LABEL: @test_vmlsq_s32(
7520 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7521 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7522 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t a,int32x4_t b,int32x4_t c)7523 int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
7524   return vmlsq_s32(a, b, c);
7525 }
7526 
7527 // CHECK-LABEL: @test_vmlsq_f32(
7528 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, %c
7529 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7530 // CHECK:   ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7531 float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
7532   return vmlsq_f32(a, b, c);
7533 }
7534 
7535 // CHECK-LABEL: @test_vmlsq_u8(
7536 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7537 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7538 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)7539 uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
7540   return vmlsq_u8(a, b, c);
7541 }
7542 
7543 // CHECK-LABEL: @test_vmlsq_u16(
7544 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7545 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7546 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)7547 uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
7548   return vmlsq_u16(a, b, c);
7549 }
7550 
7551 // CHECK-LABEL: @test_vmlsq_u32(
7552 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7553 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7554 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)7555 uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
7556   return vmlsq_u32(a, b, c);
7557 }
7558 
7559 // CHECK-LABEL: @test_vmlsl_s8(
7560 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
7561 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7562 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)7563 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7564   return vmlsl_s8(a, b, c);
7565 }
7566 
7567 // CHECK-LABEL: @test_vmlsl_s16(
7568 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7569 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7570 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
7571 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7572 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)7573 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7574   return vmlsl_s16(a, b, c);
7575 }
7576 
7577 // CHECK-LABEL: @test_vmlsl_s32(
7578 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7579 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7580 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
7581 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7582 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)7583 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7584   return vmlsl_s32(a, b, c);
7585 }
7586 
7587 // CHECK-LABEL: @test_vmlsl_u8(
7588 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
7589 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7590 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7591 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7592   return vmlsl_u8(a, b, c);
7593 }
7594 
7595 // CHECK-LABEL: @test_vmlsl_u16(
7596 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7597 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7598 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
7599 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7600 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7601 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7602   return vmlsl_u16(a, b, c);
7603 }
7604 
7605 // CHECK-LABEL: @test_vmlsl_u32(
7606 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7607 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7608 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
7609 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7610 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7611 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7612   return vmlsl_u32(a, b, c);
7613 }
7614 
7615 // CHECK-LABEL: @test_vmlsl_lane_s16(
7616 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7617 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7618 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7619 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7620 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7621 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
7622 // CHECK:   [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7623 // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)7624 int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7625   return vmlsl_lane_s16(a, b, c, 3);
7626 }
7627 
7628 // CHECK-LABEL: @test_vmlsl_lane_s32(
7629 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7630 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7631 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7632 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7633 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7634 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
7635 // CHECK:   [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7636 // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)7637 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7638   return vmlsl_lane_s32(a, b, c, 1);
7639 }
7640 
7641 // CHECK-LABEL: @test_vmlsl_lane_u16(
7642 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7643 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7644 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7645 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7646 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7647 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
7648 // CHECK:   [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7649 // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7650 uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7651   return vmlsl_lane_u16(a, b, c, 3);
7652 }
7653 
7654 // CHECK-LABEL: @test_vmlsl_lane_u32(
7655 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7656 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7657 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7658 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7659 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7660 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
7661 // CHECK:   [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7662 // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7663 uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7664   return vmlsl_lane_u32(a, b, c, 1);
7665 }
7666 
7667 // CHECK-LABEL: @test_vmlsl_n_s16(
7668 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7669 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7670 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7671 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7672 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7673 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7674 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7675 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7676 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)7677 int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
7678   return vmlsl_n_s16(a, b, c);
7679 }
7680 
7681 // CHECK-LABEL: @test_vmlsl_n_s32(
7682 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7683 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7684 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7685 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7686 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7687 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7688 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)7689 int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
7690   return vmlsl_n_s32(a, b, c);
7691 }
7692 
7693 // CHECK-LABEL: @test_vmlsl_n_u16(
7694 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7695 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7696 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7697 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7698 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7699 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7700 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7701 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7702 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)7703 uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
7704   return vmlsl_n_u16(a, b, c);
7705 }
7706 
7707 // CHECK-LABEL: @test_vmlsl_n_u32(
7708 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7709 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7710 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7711 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7712 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7713 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7714 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)7715 uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
7716   return vmlsl_n_u32(a, b, c);
7717 }
7718 
7719 // CHECK-LABEL: @test_vmls_lane_s16(
7720 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7721 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7722 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7723 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7724 // CHECK:   [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7725 // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)7726 int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7727   return vmls_lane_s16(a, b, c, 3);
7728 }
7729 
7730 // CHECK-LABEL: @test_vmls_lane_s32(
7731 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7732 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7733 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7734 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7735 // CHECK:   [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7736 // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)7737 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7738   return vmls_lane_s32(a, b, c, 1);
7739 }
7740 
7741 // CHECK-LABEL: @test_vmls_lane_u16(
7742 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7743 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7744 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7745 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7746 // CHECK:   [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7747 // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7748 uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7749   return vmls_lane_u16(a, b, c, 3);
7750 }
7751 
7752 // CHECK-LABEL: @test_vmls_lane_u32(
7753 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7754 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7755 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7756 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7757 // CHECK:   [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7758 // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7759 uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7760   return vmls_lane_u32(a, b, c, 1);
7761 }
7762 
7763 // CHECK-LABEL: @test_vmls_lane_f32(
7764 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7765 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7766 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
7767 // CHECK:   [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
7768 // CHECK:   [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
7769 // CHECK:   ret <2 x float> [[SUB]]
test_vmls_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)7770 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7771   return vmls_lane_f32(a, b, c, 1);
7772 }
7773 
7774 // CHECK-LABEL: @test_vmlsq_lane_s16(
7775 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7776 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7777 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7778 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7779 // CHECK:   [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7780 // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)7781 int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
7782   return vmlsq_lane_s16(a, b, c, 3);
7783 }
7784 
7785 // CHECK-LABEL: @test_vmlsq_lane_s32(
7786 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7787 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7788 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7789 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7790 // CHECK:   [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7791 // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)7792 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
7793   return vmlsq_lane_s32(a, b, c, 1);
7794 }
7795 
7796 // CHECK-LABEL: @test_vmlsq_lane_u16(
7797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7798 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7799 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7800 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7801 // CHECK:   [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7802 // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)7803 uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
7804   return vmlsq_lane_u16(a, b, c, 3);
7805 }
7806 
7807 // CHECK-LABEL: @test_vmlsq_lane_u32(
7808 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7809 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7810 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7811 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7812 // CHECK:   [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7813 // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)7814 uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
7815   return vmlsq_lane_u32(a, b, c, 1);
7816 }
7817 
7818 // CHECK-LABEL: @test_vmlsq_lane_f32(
7819 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7820 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7821 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7822 // CHECK:   [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
7823 // CHECK:   [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
7824 // CHECK:   ret <4 x float> [[SUB]]
test_vmlsq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)7825 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
7826   return vmlsq_lane_f32(a, b, c, 1);
7827 }
7828 
7829 // CHECK-LABEL: @test_vmls_n_s16(
7830 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7831 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7832 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7833 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7834 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7835 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7836 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_n_s16(int16x4_t a,int16x4_t b,int16_t c)7837 int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
7838   return vmls_n_s16(a, b, c);
7839 }
7840 
7841 // CHECK-LABEL: @test_vmls_n_s32(
7842 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7843 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7844 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7845 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7846 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_n_s32(int32x2_t a,int32x2_t b,int32_t c)7847 int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
7848   return vmls_n_s32(a, b, c);
7849 }
7850 
7851 // CHECK-LABEL: @test_vmls_n_u16(
7852 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7853 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7854 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7855 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7856 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7857 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7858 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)7859 uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
7860   return vmls_n_u16(a, b, c);
7861 }
7862 
7863 // CHECK-LABEL: @test_vmls_n_u32(
7864 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7865 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7866 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7867 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7868 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)7869 uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
7870   return vmls_n_u32(a, b, c);
7871 }
7872 
7873 // CHECK-LABEL: @test_vmls_n_f32(
7874 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
7875 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
7876 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
7877 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7878 // CHECK:   ret <2 x float> [[SUB_I]]
test_vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)7879 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
7880   return vmls_n_f32(a, b, c);
7881 }
7882 
7883 // CHECK-LABEL: @test_vmlsq_n_s16(
7884 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7885 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7886 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7887 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7888 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7889 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7890 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7891 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7892 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7893 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7894 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_n_s16(int16x8_t a,int16x8_t b,int16_t c)7895 int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
7896   return vmlsq_n_s16(a, b, c);
7897 }
7898 
7899 // CHECK-LABEL: @test_vmlsq_n_s32(
7900 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7901 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7902 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7903 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7904 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7905 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7906 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_n_s32(int32x4_t a,int32x4_t b,int32_t c)7907 int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7908   return vmlsq_n_s32(a, b, c);
7909 }
7910 
7911 // CHECK-LABEL: @test_vmlsq_n_u16(
7912 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7913 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7914 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7915 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7916 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7917 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7918 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7919 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7920 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7921 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7922 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)7923 uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7924   return vmlsq_n_u16(a, b, c);
7925 }
7926 
7927 // CHECK-LABEL: @test_vmlsq_n_u32(
7928 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7929 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7930 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7931 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7932 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7933 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7934 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)7935 uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7936   return vmlsq_n_u32(a, b, c);
7937 }
7938 
7939 // CHECK-LABEL: @test_vmlsq_n_f32(
7940 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7941 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7942 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7943 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7944 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7945 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7946 // CHECK:   ret <4 x float> [[SUB_I]]
test_vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7947 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7948   return vmlsq_n_f32(a, b, c);
7949 }
7950 
7951 // CHECK-LABEL: @test_vmovl_s8(
7952 // CHECK:   [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7953 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)7954 int16x8_t test_vmovl_s8(int8x8_t a) {
7955   return vmovl_s8(a);
7956 }
7957 
7958 // CHECK-LABEL: @test_vmovl_s16(
7959 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7960 // CHECK:   [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
7961 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)7962 int32x4_t test_vmovl_s16(int16x4_t a) {
7963   return vmovl_s16(a);
7964 }
7965 
7966 // CHECK-LABEL: @test_vmovl_s32(
7967 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7968 // CHECK:   [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
7969 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)7970 int64x2_t test_vmovl_s32(int32x2_t a) {
7971   return vmovl_s32(a);
7972 }
7973 
7974 // CHECK-LABEL: @test_vmovl_u8(
7975 // CHECK:   [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7976 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)7977 uint16x8_t test_vmovl_u8(uint8x8_t a) {
7978   return vmovl_u8(a);
7979 }
7980 
7981 // CHECK-LABEL: @test_vmovl_u16(
7982 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7983 // CHECK:   [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
7984 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)7985 uint32x4_t test_vmovl_u16(uint16x4_t a) {
7986   return vmovl_u16(a);
7987 }
7988 
7989 // CHECK-LABEL: @test_vmovl_u32(
7990 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7991 // CHECK:   [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
7992 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)7993 uint64x2_t test_vmovl_u32(uint32x2_t a) {
7994   return vmovl_u32(a);
7995 }
7996 
7997 // CHECK-LABEL: @test_vmovn_s16(
7998 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7999 // CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
8000 // CHECK:   ret <8 x i8> [[VMOVN_I]]
test_vmovn_s16(int16x8_t a)8001 int8x8_t test_vmovn_s16(int16x8_t a) {
8002   return vmovn_s16(a);
8003 }
8004 
8005 // CHECK-LABEL: @test_vmovn_s32(
8006 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8007 // CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
8008 // CHECK:   ret <4 x i16> [[VMOVN_I]]
test_vmovn_s32(int32x4_t a)8009 int16x4_t test_vmovn_s32(int32x4_t a) {
8010   return vmovn_s32(a);
8011 }
8012 
8013 // CHECK-LABEL: @test_vmovn_s64(
8014 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8015 // CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
8016 // CHECK:   ret <2 x i32> [[VMOVN_I]]
test_vmovn_s64(int64x2_t a)8017 int32x2_t test_vmovn_s64(int64x2_t a) {
8018   return vmovn_s64(a);
8019 }
8020 
8021 // CHECK-LABEL: @test_vmovn_u16(
8022 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8023 // CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
8024 // CHECK:   ret <8 x i8> [[VMOVN_I]]
test_vmovn_u16(uint16x8_t a)8025 uint8x8_t test_vmovn_u16(uint16x8_t a) {
8026   return vmovn_u16(a);
8027 }
8028 
8029 // CHECK-LABEL: @test_vmovn_u32(
8030 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8031 // CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
8032 // CHECK:   ret <4 x i16> [[VMOVN_I]]
test_vmovn_u32(uint32x4_t a)8033 uint16x4_t test_vmovn_u32(uint32x4_t a) {
8034   return vmovn_u32(a);
8035 }
8036 
8037 // CHECK-LABEL: @test_vmovn_u64(
8038 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8039 // CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
8040 // CHECK:   ret <2 x i32> [[VMOVN_I]]
test_vmovn_u64(uint64x2_t a)8041 uint32x2_t test_vmovn_u64(uint64x2_t a) {
8042   return vmovn_u64(a);
8043 }
8044 
8045 // CHECK-LABEL: @test_vmov_n_u8(
8046 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8047 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8048 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8049 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8050 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8051 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8052 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8053 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8054 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_u8(uint8_t a)8055 uint8x8_t test_vmov_n_u8(uint8_t a) {
8056   return vmov_n_u8(a);
8057 }
8058 
8059 // CHECK-LABEL: @test_vmov_n_u16(
8060 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8061 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8062 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8063 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8064 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_u16(uint16_t a)8065 uint16x4_t test_vmov_n_u16(uint16_t a) {
8066   return vmov_n_u16(a);
8067 }
8068 
8069 // CHECK-LABEL: @test_vmov_n_u32(
8070 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
8071 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
8072 // CHECK:   ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_u32(uint32_t a)8073 uint32x2_t test_vmov_n_u32(uint32_t a) {
8074   return vmov_n_u32(a);
8075 }
8076 
8077 // CHECK-LABEL: @test_vmov_n_s8(
8078 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8079 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8080 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8081 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8082 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8083 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8084 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8085 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8086 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_s8(int8_t a)8087 int8x8_t test_vmov_n_s8(int8_t a) {
8088   return vmov_n_s8(a);
8089 }
8090 
8091 // CHECK-LABEL: @test_vmov_n_s16(
8092 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8093 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8094 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8095 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8096 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_s16(int16_t a)8097 int16x4_t test_vmov_n_s16(int16_t a) {
8098   return vmov_n_s16(a);
8099 }
8100 
8101 // CHECK-LABEL: @test_vmov_n_s32(
8102 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
8103 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
8104 // CHECK:   ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_s32(int32_t a)8105 int32x2_t test_vmov_n_s32(int32_t a) {
8106   return vmov_n_s32(a);
8107 }
8108 
8109 // CHECK-LABEL: @test_vmov_n_p8(
8110 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8111 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8112 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8113 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8114 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8115 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8116 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8117 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8118 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_p8(poly8_t a)8119 poly8x8_t test_vmov_n_p8(poly8_t a) {
8120   return vmov_n_p8(a);
8121 }
8122 
8123 // CHECK-LABEL: @test_vmov_n_p16(
8124 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8125 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8126 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8127 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8128 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_p16(poly16_t a)8129 poly16x4_t test_vmov_n_p16(poly16_t a) {
8130   return vmov_n_p16(a);
8131 }
8132 
8133 // CHECK-LABEL: @test_vmov_n_f16(
8134 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
8135 // CHECK:   [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
8136 // CHECK:   [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
8137 // CHECK:   [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
8138 // CHECK:   [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
8139 // CHECK:   ret <4 x half> [[VECINIT3]]
test_vmov_n_f16(float16_t * a)8140 float16x4_t test_vmov_n_f16(float16_t *a) {
8141   return vmov_n_f16(*a);
8142 }
8143 
8144 // CHECK-LABEL: @test_vmov_n_f32(
8145 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
8146 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
8147 // CHECK:   ret <2 x float> [[VECINIT1_I]]
test_vmov_n_f32(float32_t a)8148 float32x2_t test_vmov_n_f32(float32_t a) {
8149   return vmov_n_f32(a);
8150 }
8151 
8152 // CHECK-LABEL: @test_vmovq_n_u8(
8153 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8154 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8155 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8156 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8157 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8158 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8159 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8160 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8161 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8162 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8163 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8164 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8165 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8166 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8167 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8168 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8169 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_u8(uint8_t a)8170 uint8x16_t test_vmovq_n_u8(uint8_t a) {
8171   return vmovq_n_u8(a);
8172 }
8173 
8174 // CHECK-LABEL: @test_vmovq_n_u16(
8175 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8176 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8177 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8178 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8179 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8180 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8181 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8182 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8183 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_u16(uint16_t a)8184 uint16x8_t test_vmovq_n_u16(uint16_t a) {
8185   return vmovq_n_u16(a);
8186 }
8187 
8188 // CHECK-LABEL: @test_vmovq_n_u32(
8189 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
8190 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
8191 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
8192 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
8193 // CHECK:   ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_u32(uint32_t a)8194 uint32x4_t test_vmovq_n_u32(uint32_t a) {
8195   return vmovq_n_u32(a);
8196 }
8197 
8198 // CHECK-LABEL: @test_vmovq_n_s8(
8199 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8200 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8201 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8202 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8203 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8204 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8205 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8206 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8207 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8208 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8209 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8210 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8211 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8212 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8213 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8214 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8215 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_s8(int8_t a)8216 int8x16_t test_vmovq_n_s8(int8_t a) {
8217   return vmovq_n_s8(a);
8218 }
8219 
8220 // CHECK-LABEL: @test_vmovq_n_s16(
8221 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8222 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8223 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8224 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8225 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8226 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8227 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8228 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8229 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_s16(int16_t a)8230 int16x8_t test_vmovq_n_s16(int16_t a) {
8231   return vmovq_n_s16(a);
8232 }
8233 
8234 // CHECK-LABEL: @test_vmovq_n_s32(
8235 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
8236 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
8237 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
8238 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
8239 // CHECK:   ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_s32(int32_t a)8240 int32x4_t test_vmovq_n_s32(int32_t a) {
8241   return vmovq_n_s32(a);
8242 }
8243 
8244 // CHECK-LABEL: @test_vmovq_n_p8(
8245 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8246 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8247 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8248 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8249 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8250 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8251 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8252 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8253 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8254 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8255 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8256 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8257 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8258 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8259 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8260 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8261 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_p8(poly8_t a)8262 poly8x16_t test_vmovq_n_p8(poly8_t a) {
8263   return vmovq_n_p8(a);
8264 }
8265 
8266 // CHECK-LABEL: @test_vmovq_n_p16(
8267 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8268 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8269 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8270 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8271 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8272 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8273 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8274 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8275 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_p16(poly16_t a)8276 poly16x8_t test_vmovq_n_p16(poly16_t a) {
8277   return vmovq_n_p16(a);
8278 }
8279 
8280 // CHECK-LABEL: @test_vmovq_n_f16(
8281 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
8282 // CHECK:   [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
8283 // CHECK:   [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
8284 // CHECK:   [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
8285 // CHECK:   [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
8286 // CHECK:   [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
8287 // CHECK:   [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
8288 // CHECK:   [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
8289 // CHECK:   [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
8290 // CHECK:   ret <8 x half> [[VECINIT7]]
test_vmovq_n_f16(float16_t * a)8291 float16x8_t test_vmovq_n_f16(float16_t *a) {
8292   return vmovq_n_f16(*a);
8293 }
8294 
8295 // CHECK-LABEL: @test_vmovq_n_f32(
8296 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
8297 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
8298 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
8299 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
8300 // CHECK:   ret <4 x float> [[VECINIT3_I]]
test_vmovq_n_f32(float32_t a)8301 float32x4_t test_vmovq_n_f32(float32_t a) {
8302   return vmovq_n_f32(a);
8303 }
8304 
8305 // CHECK-LABEL: @test_vmov_n_s64(
8306 // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
8307 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
8308 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vmov_n_s64(int64_t a)8309 int64x1_t test_vmov_n_s64(int64_t a) {
8310   int64x1_t tmp = vmov_n_s64(a);
8311   return vadd_s64(tmp, tmp);
8312 }
8313 
8314 // CHECK-LABEL: @test_vmov_n_u64(
8315 // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
8316 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
8317 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vmov_n_u64(uint64_t a)8318 uint64x1_t test_vmov_n_u64(uint64_t a) {
8319   uint64x1_t tmp = vmov_n_u64(a);
8320   return vadd_u64(tmp, tmp);
8321 }
8322 
8323 // CHECK-LABEL: @test_vmovq_n_s64(
8324 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
8325 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
8326 // CHECK:   ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_s64(int64_t a)8327 int64x2_t test_vmovq_n_s64(int64_t a) {
8328   return vmovq_n_s64(a);
8329 }
8330 
8331 // CHECK-LABEL: @test_vmovq_n_u64(
8332 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
8333 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
8334 // CHECK:   ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_u64(uint64_t a)8335 uint64x2_t test_vmovq_n_u64(uint64_t a) {
8336   return vmovq_n_u64(a);
8337 }
8338 
8339 // CHECK-LABEL: @test_vmul_s8(
8340 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %a, %b
8341 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t a,int8x8_t b)8342 int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) {
8343   return vmul_s8(a, b);
8344 }
8345 
8346 // CHECK-LABEL: @test_vmul_s16(
8347 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, %b
8348 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t a,int16x4_t b)8349 int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) {
8350   return vmul_s16(a, b);
8351 }
8352 
8353 // CHECK-LABEL: @test_vmul_s32(
8354 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, %b
8355 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t a,int32x2_t b)8356 int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) {
8357   return vmul_s32(a, b);
8358 }
8359 
8360 // CHECK-LABEL: @test_vmul_f32(
8361 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %a, %b
8362 // CHECK:   ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t a,float32x2_t b)8363 float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) {
8364   return vmul_f32(a, b);
8365 }
8366 
8367 // CHECK-LABEL: @test_vmul_u8(
8368 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %a, %b
8369 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t a,uint8x8_t b)8370 uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) {
8371   return vmul_u8(a, b);
8372 }
8373 
8374 // CHECK-LABEL: @test_vmul_u16(
8375 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, %b
8376 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t a,uint16x4_t b)8377 uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) {
8378   return vmul_u16(a, b);
8379 }
8380 
8381 // CHECK-LABEL: @test_vmul_u32(
8382 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, %b
8383 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t a,uint32x2_t b)8384 uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) {
8385   return vmul_u32(a, b);
8386 }
8387 
8388 // CHECK-LABEL: @test_vmulq_s8(
8389 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %a, %b
8390 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t a,int8x16_t b)8391 int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) {
8392   return vmulq_s8(a, b);
8393 }
8394 
8395 // CHECK-LABEL: @test_vmulq_s16(
8396 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, %b
8397 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t a,int16x8_t b)8398 int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) {
8399   return vmulq_s16(a, b);
8400 }
8401 
8402 // CHECK-LABEL: @test_vmulq_s32(
8403 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8404 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t a,int32x4_t b)8405 int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) {
8406   return vmulq_s32(a, b);
8407 }
8408 
8409 // CHECK-LABEL: @test_vmulq_f32(
8410 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %a, %b
8411 // CHECK:   ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t a,float32x4_t b)8412 float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) {
8413   return vmulq_f32(a, b);
8414 }
8415 
8416 // CHECK-LABEL: @test_vmulq_u8(
8417 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %a, %b
8418 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t a,uint8x16_t b)8419 uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) {
8420   return vmulq_u8(a, b);
8421 }
8422 
8423 // CHECK-LABEL: @test_vmulq_u16(
8424 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, %b
8425 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t a,uint16x8_t b)8426 uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) {
8427   return vmulq_u16(a, b);
8428 }
8429 
8430 // CHECK-LABEL: @test_vmulq_u32(
8431 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8432 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t a,uint32x4_t b)8433 uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) {
8434   return vmulq_u32(a, b);
8435 }
8436 
8437 // CHECK-LABEL: @test_vmull_s8(
8438 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
8439 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)8440 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
8441   return vmull_s8(a, b);
8442 }
8443 
8444 // CHECK-LABEL: @test_vmull_s16(
8445 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8446 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8447 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
8448 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)8449 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
8450   return vmull_s16(a, b);
8451 }
8452 
8453 // CHECK-LABEL: @test_vmull_s32(
8454 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8455 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8456 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
8457 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)8458 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
8459   return vmull_s32(a, b);
8460 }
8461 
8462 // CHECK-LABEL: @test_vmull_u8(
8463 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
8464 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)8465 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
8466   return vmull_u8(a, b);
8467 }
8468 
8469 // CHECK-LABEL: @test_vmull_u16(
8470 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8471 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8472 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
8473 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)8474 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
8475   return vmull_u16(a, b);
8476 }
8477 
8478 // CHECK-LABEL: @test_vmull_u32(
8479 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8480 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8481 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
8482 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)8483 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
8484   return vmull_u32(a, b);
8485 }
8486 
8487 // CHECK-LABEL: @test_vmull_p8(
8488 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
8489 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)8490 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8491   return vmull_p8(a, b);
8492 }
8493 
8494 // CHECK-LABEL: @test_vmull_lane_s16(
8495 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8496 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8497 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8498 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8499 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8500 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
8501 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_s16(int16x4_t a,int16x4_t b)8502 int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) {
8503   return vmull_lane_s16(a, b, 3);
8504 }
8505 
8506 // CHECK-LABEL: @test_vmull_lane_s32(
8507 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8508 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8509 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8510 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8511 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8512 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
8513 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_s32(int32x2_t a,int32x2_t b)8514 int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) {
8515   return vmull_lane_s32(a, b, 1);
8516 }
8517 
8518 // CHECK-LABEL: @test_vmull_lane_u16(
8519 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8520 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8521 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8522 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8523 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8524 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
8525 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_u16(uint16x4_t a,uint16x4_t b)8526 uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) {
8527   return vmull_lane_u16(a, b, 3);
8528 }
8529 
8530 // CHECK-LABEL: @test_vmull_lane_u32(
8531 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8532 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8533 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8534 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8535 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8536 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
8537 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_u32(uint32x2_t a,uint32x2_t b)8538 uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) {
8539   return vmull_lane_u32(a, b, 1);
8540 }
8541 
8542 // CHECK-LABEL: @test_vmull_n_s16(
8543 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8544 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8545 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8546 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8547 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8548 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8549 // CHECK:   [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8550 // CHECK:   ret <4 x i32> [[VMULL5_I]]
test_vmull_n_s16(int16x4_t a,int16_t b)8551 int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
8552   return vmull_n_s16(a, b);
8553 }
8554 
8555 // CHECK-LABEL: @test_vmull_n_s32(
8556 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8557 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8558 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8559 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8560 // CHECK:   [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8561 // CHECK:   ret <2 x i64> [[VMULL3_I]]
test_vmull_n_s32(int32x2_t a,int32_t b)8562 int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
8563   return vmull_n_s32(a, b);
8564 }
8565 
8566 // CHECK-LABEL: @test_vmull_n_u16(
8567 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8568 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8569 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8570 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8571 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8572 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8573 // CHECK:   [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8574 // CHECK:   ret <4 x i32> [[VMULL5_I]]
test_vmull_n_u16(uint16x4_t a,uint16_t b)8575 uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
8576   return vmull_n_u16(a, b);
8577 }
8578 
8579 // CHECK-LABEL: @test_vmull_n_u32(
8580 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8581 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8582 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8583 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8584 // CHECK:   [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8585 // CHECK:   ret <2 x i64> [[VMULL3_I]]
test_vmull_n_u32(uint32x2_t a,uint32_t b)8586 uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
8587   return vmull_n_u32(a, b);
8588 }
8589 
8590 // CHECK-LABEL: @test_vmul_p8(
8591 // CHECK:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b)
8592 // CHECK:   ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t a,poly8x8_t b)8593 poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) {
8594   return vmul_p8(a, b);
8595 }
8596 
8597 // CHECK-LABEL: @test_vmulq_p8(
8598 // CHECK:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b)
8599 // CHECK:   ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t a,poly8x16_t b)8600 poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) {
8601   return vmulq_p8(a, b);
8602 }
8603 
8604 // CHECK-LABEL: @test_vmul_lane_s16(
8605 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8606 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8607 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8608 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8609 // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_lane_s16(int16x4_t a,int16x4_t b)8610 int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) {
8611   return vmul_lane_s16(a, b, 3);
8612 }
8613 
8614 // CHECK-LABEL: @test_vmul_lane_s32(
8615 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8616 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8617 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8618 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8619 // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_lane_s32(int32x2_t a,int32x2_t b)8620 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) {
8621   return vmul_lane_s32(a, b, 1);
8622 }
8623 
8624 // CHECK-LABEL: @test_vmul_lane_f32(
8625 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8626 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8627 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
8628 // CHECK:   [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
8629 // CHECK:   ret <2 x float> [[MUL]]
test_vmul_lane_f32(float32x2_t a,float32x2_t b)8630 float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) {
8631   return vmul_lane_f32(a, b, 1);
8632 }
8633 
8634 // CHECK-LABEL: @test_vmul_lane_u16(
8635 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8636 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8637 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8638 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8639 // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_lane_u16(uint16x4_t a,uint16x4_t b)8640 uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) {
8641   return vmul_lane_u16(a, b, 3);
8642 }
8643 
8644 // CHECK-LABEL: @test_vmul_lane_u32(
8645 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8646 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8647 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8648 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8649 // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_lane_u32(uint32x2_t a,uint32x2_t b)8650 uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) {
8651   return vmul_lane_u32(a, b, 1);
8652 }
8653 
8654 // CHECK-LABEL: @test_vmulq_lane_s16(
8655 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8656 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8657 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8658 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8659 // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_lane_s16(int16x8_t a,int16x4_t b)8660 int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) {
8661   return vmulq_lane_s16(a, b, 3);
8662 }
8663 
8664 // CHECK-LABEL: @test_vmulq_lane_s32(
8665 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8666 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8667 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8668 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8669 // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_lane_s32(int32x4_t a,int32x2_t b)8670 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) {
8671   return vmulq_lane_s32(a, b, 1);
8672 }
8673 
8674 // CHECK-LABEL: @test_vmulq_lane_f32(
8675 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8676 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8677 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8678 // CHECK:   [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
8679 // CHECK:   ret <4 x float> [[MUL]]
test_vmulq_lane_f32(float32x4_t a,float32x2_t b)8680 float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) {
8681   return vmulq_lane_f32(a, b, 1);
8682 }
8683 
8684 // CHECK-LABEL: @test_vmulq_lane_u16(
8685 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8686 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8687 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8688 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8689 // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_lane_u16(uint16x8_t a,uint16x4_t b)8690 uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) {
8691   return vmulq_lane_u16(a, b, 3);
8692 }
8693 
8694 // CHECK-LABEL: @test_vmulq_lane_u32(
8695 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8696 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8697 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8698 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8699 // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_lane_u32(uint32x4_t a,uint32x2_t b)8700 uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) {
8701   return vmulq_lane_u32(a, b, 1);
8702 }
8703 
8704 // CHECK-LABEL: @test_vmul_n_s16(
8705 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8706 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8707 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8708 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8709 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8710 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_n_s16(int16x4_t a,int16_t b)8711 int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
8712   return vmul_n_s16(a, b);
8713 }
8714 
8715 // CHECK-LABEL: @test_vmul_n_s32(
8716 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8717 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8718 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8719 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_n_s32(int32x2_t a,int32_t b)8720 int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
8721   return vmul_n_s32(a, b);
8722 }
8723 
8724 // CHECK-LABEL: @test_vmul_n_f32(
8725 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0
8726 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1
8727 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]]
8728 // CHECK:   ret <2 x float> [[MUL_I]]
test_vmul_n_f32(float32x2_t a,float32_t b)8729 float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
8730   return vmul_n_f32(a, b);
8731 }
8732 
8733 // CHECK-LABEL: @test_vmul_n_u16(
8734 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8735 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8736 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8737 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8738 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8739 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_n_u16(uint16x4_t a,uint16_t b)8740 uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
8741   return vmul_n_u16(a, b);
8742 }
8743 
8744 // CHECK-LABEL: @test_vmul_n_u32(
8745 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8746 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8747 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8748 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_n_u32(uint32x2_t a,uint32_t b)8749 uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
8750   return vmul_n_u32(a, b);
8751 }
8752 
8753 // CHECK-LABEL: @test_vmulq_n_s16(
8754 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8755 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8756 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8757 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8758 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8759 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8760 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8761 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8762 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8763 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_n_s16(int16x8_t a,int16_t b)8764 int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
8765   return vmulq_n_s16(a, b);
8766 }
8767 
8768 // CHECK-LABEL: @test_vmulq_n_s32(
8769 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8770 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8771 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8772 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8773 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8774 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_n_s32(int32x4_t a,int32_t b)8775 int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
8776   return vmulq_n_s32(a, b);
8777 }
8778 
8779 // CHECK-LABEL: @test_vmulq_n_f32(
8780 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0
8781 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1
8782 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2
8783 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3
8784 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]]
8785 // CHECK:   ret <4 x float> [[MUL_I]]
test_vmulq_n_f32(float32x4_t a,float32_t b)8786 float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
8787   return vmulq_n_f32(a, b);
8788 }
8789 
8790 // CHECK-LABEL: @test_vmulq_n_u16(
8791 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8792 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8793 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8794 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8795 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8796 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8797 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8798 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8799 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8800 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_n_u16(uint16x8_t a,uint16_t b)8801 uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
8802   return vmulq_n_u16(a, b);
8803 }
8804 
8805 // CHECK-LABEL: @test_vmulq_n_u32(
8806 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8807 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8808 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8809 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8810 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8811 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_n_u32(uint32x4_t a,uint32_t b)8812 uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
8813   return vmulq_n_u32(a, b);
8814 }
8815 
8816 // CHECK-LABEL: @test_vmvn_s8(
8817 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8818 // CHECK:   ret <8 x i8> [[NEG_I]]
test_vmvn_s8(int8x8_t a)8819 int8x8_t test_vmvn_s8(int8x8_t a) {
8820   return vmvn_s8(a);
8821 }
8822 
8823 // CHECK-LABEL: @test_vmvn_s16(
8824 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8825 // CHECK:   ret <4 x i16> [[NEG_I]]
test_vmvn_s16(int16x4_t a)8826 int16x4_t test_vmvn_s16(int16x4_t a) {
8827   return vmvn_s16(a);
8828 }
8829 
8830 // CHECK-LABEL: @test_vmvn_s32(
8831 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8832 // CHECK:   ret <2 x i32> [[NEG_I]]
test_vmvn_s32(int32x2_t a)8833 int32x2_t test_vmvn_s32(int32x2_t a) {
8834   return vmvn_s32(a);
8835 }
8836 
8837 // CHECK-LABEL: @test_vmvn_u8(
8838 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8839 // CHECK:   ret <8 x i8> [[NEG_I]]
test_vmvn_u8(uint8x8_t a)8840 uint8x8_t test_vmvn_u8(uint8x8_t a) {
8841   return vmvn_u8(a);
8842 }
8843 
8844 // CHECK-LABEL: @test_vmvn_u16(
8845 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8846 // CHECK:   ret <4 x i16> [[NEG_I]]
test_vmvn_u16(uint16x4_t a)8847 uint16x4_t test_vmvn_u16(uint16x4_t a) {
8848   return vmvn_u16(a);
8849 }
8850 
8851 // CHECK-LABEL: @test_vmvn_u32(
8852 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8853 // CHECK:   ret <2 x i32> [[NEG_I]]
test_vmvn_u32(uint32x2_t a)8854 uint32x2_t test_vmvn_u32(uint32x2_t a) {
8855   return vmvn_u32(a);
8856 }
8857 
8858 // CHECK-LABEL: @test_vmvn_p8(
8859 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8860 // CHECK:   ret <8 x i8> [[NEG_I]]
test_vmvn_p8(poly8x8_t a)8861 poly8x8_t test_vmvn_p8(poly8x8_t a) {
8862   return vmvn_p8(a);
8863 }
8864 
8865 // CHECK-LABEL: @test_vmvnq_s8(
8866 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8867 // CHECK:   ret <16 x i8> [[NEG_I]]
test_vmvnq_s8(int8x16_t a)8868 int8x16_t test_vmvnq_s8(int8x16_t a) {
8869   return vmvnq_s8(a);
8870 }
8871 
8872 // CHECK-LABEL: @test_vmvnq_s16(
8873 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8874 // CHECK:   ret <8 x i16> [[NEG_I]]
test_vmvnq_s16(int16x8_t a)8875 int16x8_t test_vmvnq_s16(int16x8_t a) {
8876   return vmvnq_s16(a);
8877 }
8878 
8879 // CHECK-LABEL: @test_vmvnq_s32(
8880 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8881 // CHECK:   ret <4 x i32> [[NEG_I]]
test_vmvnq_s32(int32x4_t a)8882 int32x4_t test_vmvnq_s32(int32x4_t a) {
8883   return vmvnq_s32(a);
8884 }
8885 
8886 // CHECK-LABEL: @test_vmvnq_u8(
8887 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8888 // CHECK:   ret <16 x i8> [[NEG_I]]
test_vmvnq_u8(uint8x16_t a)8889 uint8x16_t test_vmvnq_u8(uint8x16_t a) {
8890   return vmvnq_u8(a);
8891 }
8892 
8893 // CHECK-LABEL: @test_vmvnq_u16(
8894 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8895 // CHECK:   ret <8 x i16> [[NEG_I]]
test_vmvnq_u16(uint16x8_t a)8896 uint16x8_t test_vmvnq_u16(uint16x8_t a) {
8897   return vmvnq_u16(a);
8898 }
8899 
8900 // CHECK-LABEL: @test_vmvnq_u32(
8901 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8902 // CHECK:   ret <4 x i32> [[NEG_I]]
test_vmvnq_u32(uint32x4_t a)8903 uint32x4_t test_vmvnq_u32(uint32x4_t a) {
8904   return vmvnq_u32(a);
8905 }
8906 
8907 // CHECK-LABEL: @test_vmvnq_p8(
8908 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8909 // CHECK:   ret <16 x i8> [[NEG_I]]
test_vmvnq_p8(poly8x16_t a)8910 poly8x16_t test_vmvnq_p8(poly8x16_t a) {
8911   return vmvnq_p8(a);
8912 }
8913 
8914 // CHECK-LABEL: @test_vneg_s8(
8915 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
8916 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vneg_s8(int8x8_t a)8917 int8x8_t test_vneg_s8(int8x8_t a) {
8918   return vneg_s8(a);
8919 }
8920 
8921 // CHECK-LABEL: @test_vneg_s16(
8922 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
8923 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vneg_s16(int16x4_t a)8924 int16x4_t test_vneg_s16(int16x4_t a) {
8925   return vneg_s16(a);
8926 }
8927 
8928 // CHECK-LABEL: @test_vneg_s32(
8929 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
8930 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vneg_s32(int32x2_t a)8931 int32x2_t test_vneg_s32(int32x2_t a) {
8932   return vneg_s32(a);
8933 }
8934 
8935 // CHECK-LABEL: @test_vneg_f32(
8936 // CHECK:   [[SUB_I:%.*]] = fneg <2 x float> %a
8937 // CHECK:   ret <2 x float> [[SUB_I]]
test_vneg_f32(float32x2_t a)8938 float32x2_t test_vneg_f32(float32x2_t a) {
8939   return vneg_f32(a);
8940 }
8941 
8942 // CHECK-LABEL: @test_vnegq_s8(
8943 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
8944 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vnegq_s8(int8x16_t a)8945 int8x16_t test_vnegq_s8(int8x16_t a) {
8946   return vnegq_s8(a);
8947 }
8948 
8949 // CHECK-LABEL: @test_vnegq_s16(
8950 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
8951 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vnegq_s16(int16x8_t a)8952 int16x8_t test_vnegq_s16(int16x8_t a) {
8953   return vnegq_s16(a);
8954 }
8955 
8956 // CHECK-LABEL: @test_vnegq_s32(
8957 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
8958 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vnegq_s32(int32x4_t a)8959 int32x4_t test_vnegq_s32(int32x4_t a) {
8960   return vnegq_s32(a);
8961 }
8962 
8963 // CHECK-LABEL: @test_vnegq_f32(
8964 // CHECK:   [[SUB_I:%.*]] = fneg <4 x float> %a
8965 // CHECK:   ret <4 x float> [[SUB_I]]
test_vnegq_f32(float32x4_t a)8966 float32x4_t test_vnegq_f32(float32x4_t a) {
8967   return vnegq_f32(a);
8968 }
8969 
8970 // CHECK-LABEL: @test_vorn_s8(
8971 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8972 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
8973 // CHECK:   ret <8 x i8> [[OR_I]]
test_vorn_s8(int8x8_t a,int8x8_t b)8974 int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
8975   return vorn_s8(a, b);
8976 }
8977 
8978 // CHECK-LABEL: @test_vorn_s16(
8979 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
8980 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
8981 // CHECK:   ret <4 x i16> [[OR_I]]
test_vorn_s16(int16x4_t a,int16x4_t b)8982 int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
8983   return vorn_s16(a, b);
8984 }
8985 
8986 // CHECK-LABEL: @test_vorn_s32(
8987 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
8988 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
8989 // CHECK:   ret <2 x i32> [[OR_I]]
test_vorn_s32(int32x2_t a,int32x2_t b)8990 int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
8991   return vorn_s32(a, b);
8992 }
8993 
8994 // CHECK-LABEL: @test_vorn_s64(
8995 // CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
8996 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
8997 // CHECK:   ret <1 x i64> [[OR_I]]
test_vorn_s64(int64x1_t a,int64x1_t b)8998 int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
8999   return vorn_s64(a, b);
9000 }
9001 
9002 // CHECK-LABEL: @test_vorn_u8(
9003 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9004 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
9005 // CHECK:   ret <8 x i8> [[OR_I]]
test_vorn_u8(uint8x8_t a,uint8x8_t b)9006 uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
9007   return vorn_u8(a, b);
9008 }
9009 
9010 // CHECK-LABEL: @test_vorn_u16(
9011 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
9012 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
9013 // CHECK:   ret <4 x i16> [[OR_I]]
test_vorn_u16(uint16x4_t a,uint16x4_t b)9014 uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
9015   return vorn_u16(a, b);
9016 }
9017 
9018 // CHECK-LABEL: @test_vorn_u32(
9019 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
9020 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
9021 // CHECK:   ret <2 x i32> [[OR_I]]
test_vorn_u32(uint32x2_t a,uint32x2_t b)9022 uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
9023   return vorn_u32(a, b);
9024 }
9025 
9026 // CHECK-LABEL: @test_vorn_u64(
9027 // CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
9028 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
9029 // CHECK:   ret <1 x i64> [[OR_I]]
test_vorn_u64(uint64x1_t a,uint64x1_t b)9030 uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
9031   return vorn_u64(a, b);
9032 }
9033 
9034 // CHECK-LABEL: @test_vornq_s8(
9035 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9036 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
9037 // CHECK:   ret <16 x i8> [[OR_I]]
test_vornq_s8(int8x16_t a,int8x16_t b)9038 int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
9039   return vornq_s8(a, b);
9040 }
9041 
9042 // CHECK-LABEL: @test_vornq_s16(
9043 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
9044 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
9045 // CHECK:   ret <8 x i16> [[OR_I]]
test_vornq_s16(int16x8_t a,int16x8_t b)9046 int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
9047   return vornq_s16(a, b);
9048 }
9049 
9050 // CHECK-LABEL: @test_vornq_s32(
9051 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
9052 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
9053 // CHECK:   ret <4 x i32> [[OR_I]]
test_vornq_s32(int32x4_t a,int32x4_t b)9054 int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
9055   return vornq_s32(a, b);
9056 }
9057 
9058 // CHECK-LABEL: @test_vornq_s64(
9059 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
9060 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
9061 // CHECK:   ret <2 x i64> [[OR_I]]
test_vornq_s64(int64x2_t a,int64x2_t b)9062 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
9063   return vornq_s64(a, b);
9064 }
9065 
9066 // CHECK-LABEL: @test_vornq_u8(
9067 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9068 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
9069 // CHECK:   ret <16 x i8> [[OR_I]]
test_vornq_u8(uint8x16_t a,uint8x16_t b)9070 uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
9071   return vornq_u8(a, b);
9072 }
9073 
9074 // CHECK-LABEL: @test_vornq_u16(
9075 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
9076 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
9077 // CHECK:   ret <8 x i16> [[OR_I]]
test_vornq_u16(uint16x8_t a,uint16x8_t b)9078 uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
9079   return vornq_u16(a, b);
9080 }
9081 
9082 // CHECK-LABEL: @test_vornq_u32(
9083 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
9084 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
9085 // CHECK:   ret <4 x i32> [[OR_I]]
test_vornq_u32(uint32x4_t a,uint32x4_t b)9086 uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
9087   return vornq_u32(a, b);
9088 }
9089 
9090 // CHECK-LABEL: @test_vornq_u64(
9091 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
9092 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
9093 // CHECK:   ret <2 x i64> [[OR_I]]
test_vornq_u64(uint64x2_t a,uint64x2_t b)9094 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
9095   return vornq_u64(a, b);
9096 }
9097 
9098 // CHECK-LABEL: @test_vorr_s8(
9099 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, %b
9100 // CHECK:   ret <8 x i8> [[OR_I]]
test_vorr_s8(int8x8_t a,int8x8_t b)9101 int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
9102   return vorr_s8(a, b);
9103 }
9104 
9105 // CHECK-LABEL: @test_vorr_s16(
9106 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, %b
9107 // CHECK:   ret <4 x i16> [[OR_I]]
test_vorr_s16(int16x4_t a,int16x4_t b)9108 int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
9109   return vorr_s16(a, b);
9110 }
9111 
9112 // CHECK-LABEL: @test_vorr_s32(
9113 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, %b
9114 // CHECK:   ret <2 x i32> [[OR_I]]
test_vorr_s32(int32x2_t a,int32x2_t b)9115 int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
9116   return vorr_s32(a, b);
9117 }
9118 
9119 // CHECK-LABEL: @test_vorr_s64(
9120 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, %b
9121 // CHECK:   ret <1 x i64> [[OR_I]]
test_vorr_s64(int64x1_t a,int64x1_t b)9122 int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
9123   return vorr_s64(a, b);
9124 }
9125 
9126 // CHECK-LABEL: @test_vorr_u8(
9127 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, %b
9128 // CHECK:   ret <8 x i8> [[OR_I]]
test_vorr_u8(uint8x8_t a,uint8x8_t b)9129 uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
9130   return vorr_u8(a, b);
9131 }
9132 
9133 // CHECK-LABEL: @test_vorr_u16(
9134 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, %b
9135 // CHECK:   ret <4 x i16> [[OR_I]]
test_vorr_u16(uint16x4_t a,uint16x4_t b)9136 uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
9137   return vorr_u16(a, b);
9138 }
9139 
9140 // CHECK-LABEL: @test_vorr_u32(
9141 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, %b
9142 // CHECK:   ret <2 x i32> [[OR_I]]
test_vorr_u32(uint32x2_t a,uint32x2_t b)9143 uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
9144   return vorr_u32(a, b);
9145 }
9146 
9147 // CHECK-LABEL: @test_vorr_u64(
9148 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, %b
9149 // CHECK:   ret <1 x i64> [[OR_I]]
test_vorr_u64(uint64x1_t a,uint64x1_t b)9150 uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
9151   return vorr_u64(a, b);
9152 }
9153 
9154 // CHECK-LABEL: @test_vorrq_s8(
9155 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, %b
9156 // CHECK:   ret <16 x i8> [[OR_I]]
test_vorrq_s8(int8x16_t a,int8x16_t b)9157 int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
9158   return vorrq_s8(a, b);
9159 }
9160 
9161 // CHECK-LABEL: @test_vorrq_s16(
9162 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, %b
9163 // CHECK:   ret <8 x i16> [[OR_I]]
test_vorrq_s16(int16x8_t a,int16x8_t b)9164 int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
9165   return vorrq_s16(a, b);
9166 }
9167 
9168 // CHECK-LABEL: @test_vorrq_s32(
9169 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, %b
9170 // CHECK:   ret <4 x i32> [[OR_I]]
test_vorrq_s32(int32x4_t a,int32x4_t b)9171 int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
9172   return vorrq_s32(a, b);
9173 }
9174 
9175 // CHECK-LABEL: @test_vorrq_s64(
9176 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, %b
9177 // CHECK:   ret <2 x i64> [[OR_I]]
test_vorrq_s64(int64x2_t a,int64x2_t b)9178 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
9179   return vorrq_s64(a, b);
9180 }
9181 
9182 // CHECK-LABEL: @test_vorrq_u8(
9183 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, %b
9184 // CHECK:   ret <16 x i8> [[OR_I]]
test_vorrq_u8(uint8x16_t a,uint8x16_t b)9185 uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
9186   return vorrq_u8(a, b);
9187 }
9188 
9189 // CHECK-LABEL: @test_vorrq_u16(
9190 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, %b
9191 // CHECK:   ret <8 x i16> [[OR_I]]
test_vorrq_u16(uint16x8_t a,uint16x8_t b)9192 uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
9193   return vorrq_u16(a, b);
9194 }
9195 
9196 // CHECK-LABEL: @test_vorrq_u32(
9197 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, %b
9198 // CHECK:   ret <4 x i32> [[OR_I]]
test_vorrq_u32(uint32x4_t a,uint32x4_t b)9199 uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
9200   return vorrq_u32(a, b);
9201 }
9202 
9203 // CHECK-LABEL: @test_vorrq_u64(
9204 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, %b
9205 // CHECK:   ret <2 x i64> [[OR_I]]
test_vorrq_u64(uint64x2_t a,uint64x2_t b)9206 uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
9207   return vorrq_u64(a, b);
9208 }
9209 
9210 // CHECK-LABEL: @test_vpadal_s8(
9211 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9212 // CHECK:   [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
9213 // CHECK:   ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_s8(int16x4_t a,int8x8_t b)9214 int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
9215   return vpadal_s8(a, b);
9216 }
9217 
9218 // CHECK-LABEL: @test_vpadal_s16(
9219 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9220 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9221 // CHECK:   [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
9222 // CHECK:   ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_s16(int32x2_t a,int16x4_t b)9223 int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
9224   return vpadal_s16(a, b);
9225 }
9226 
9227 // CHECK-LABEL: @test_vpadal_s32(
9228 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9229 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9230 // CHECK:   [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
9231 // CHECK:   ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_s32(int64x1_t a,int32x2_t b)9232 int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
9233   return vpadal_s32(a, b);
9234 }
9235 
9236 // CHECK-LABEL: @test_vpadal_u8(
9237 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9238 // CHECK:   [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
9239 // CHECK:   ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_u8(uint16x4_t a,uint8x8_t b)9240 uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
9241   return vpadal_u8(a, b);
9242 }
9243 
9244 // CHECK-LABEL: @test_vpadal_u16(
9245 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9246 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9247 // CHECK:   [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
9248 // CHECK:   ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_u16(uint32x2_t a,uint16x4_t b)9249 uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
9250   return vpadal_u16(a, b);
9251 }
9252 
9253 // CHECK-LABEL: @test_vpadal_u32(
9254 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9255 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9256 // CHECK:   [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
9257 // CHECK:   ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_u32(uint64x1_t a,uint32x2_t b)9258 uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
9259   return vpadal_u32(a, b);
9260 }
9261 
9262 // CHECK-LABEL: @test_vpadalq_s8(
9263 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9264 // CHECK:   [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
9265 // CHECK:   ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_s8(int16x8_t a,int8x16_t b)9266 int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
9267   return vpadalq_s8(a, b);
9268 }
9269 
9270 // CHECK-LABEL: @test_vpadalq_s16(
9271 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9272 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9273 // CHECK:   [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
9274 // CHECK:   ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_s16(int32x4_t a,int16x8_t b)9275 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
9276   return vpadalq_s16(a, b);
9277 }
9278 
9279 // CHECK-LABEL: @test_vpadalq_s32(
9280 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9281 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9282 // CHECK:   [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
9283 // CHECK:   ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_s32(int64x2_t a,int32x4_t b)9284 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
9285   return vpadalq_s32(a, b);
9286 }
9287 
9288 // CHECK-LABEL: @test_vpadalq_u8(
9289 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9290 // CHECK:   [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
9291 // CHECK:   ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_u8(uint16x8_t a,uint8x16_t b)9292 uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
9293   return vpadalq_u8(a, b);
9294 }
9295 
9296 // CHECK-LABEL: @test_vpadalq_u16(
9297 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9298 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9299 // CHECK:   [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
9300 // CHECK:   ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_u16(uint32x4_t a,uint16x8_t b)9301 uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
9302   return vpadalq_u16(a, b);
9303 }
9304 
9305 // CHECK-LABEL: @test_vpadalq_u32(
9306 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9307 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9308 // CHECK:   [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
9309 // CHECK:   ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_u32(uint64x2_t a,uint32x4_t b)9310 uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
9311   return vpadalq_u32(a, b);
9312 }
9313 
9314 // CHECK-LABEL: @test_vpadd_s8(
9315 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
9316 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)9317 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
9318   return vpadd_s8(a, b);
9319 }
9320 
9321 // CHECK-LABEL: @test_vpadd_s16(
9322 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9323 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9324 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
9325 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
9326 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_s16(int16x4_t a,int16x4_t b)9327 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
9328   return vpadd_s16(a, b);
9329 }
9330 
9331 // CHECK-LABEL: @test_vpadd_s32(
9332 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9333 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9334 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
9335 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
9336 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_s32(int32x2_t a,int32x2_t b)9337 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
9338   return vpadd_s32(a, b);
9339 }
9340 
9341 // CHECK-LABEL: @test_vpadd_u8(
9342 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
9343 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)9344 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
9345   return vpadd_u8(a, b);
9346 }
9347 
9348 // CHECK-LABEL: @test_vpadd_u16(
9349 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9350 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9351 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
9352 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
9353 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)9354 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
9355   return vpadd_u16(a, b);
9356 }
9357 
9358 // CHECK-LABEL: @test_vpadd_u32(
9359 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9360 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9361 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
9362 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
9363 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)9364 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
9365   return vpadd_u32(a, b);
9366 }
9367 
9368 // CHECK-LABEL: @test_vpadd_f32(
9369 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9370 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9371 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b)
9372 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
9373 // CHECK:   ret <2 x float> [[VPADD_V2_I]]
test_vpadd_f32(float32x2_t a,float32x2_t b)9374 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
9375   return vpadd_f32(a, b);
9376 }
9377 
9378 // CHECK-LABEL: @test_vpaddl_s8(
9379 // CHECK:   [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a)
9380 // CHECK:   ret <4 x i16> [[VPADDL_I]]
test_vpaddl_s8(int8x8_t a)9381 int16x4_t test_vpaddl_s8(int8x8_t a) {
9382   return vpaddl_s8(a);
9383 }
9384 
9385 // CHECK-LABEL: @test_vpaddl_s16(
9386 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9387 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a)
9388 // CHECK:   ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_s16(int16x4_t a)9389 int32x2_t test_vpaddl_s16(int16x4_t a) {
9390   return vpaddl_s16(a);
9391 }
9392 
9393 // CHECK-LABEL: @test_vpaddl_s32(
9394 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9395 // CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a)
9396 // CHECK:   ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_s32(int32x2_t a)9397 int64x1_t test_vpaddl_s32(int32x2_t a) {
9398   return vpaddl_s32(a);
9399 }
9400 
9401 // CHECK-LABEL: @test_vpaddl_u8(
9402 // CHECK:   [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a)
9403 // CHECK:   ret <4 x i16> [[VPADDL_I]]
test_vpaddl_u8(uint8x8_t a)9404 uint16x4_t test_vpaddl_u8(uint8x8_t a) {
9405   return vpaddl_u8(a);
9406 }
9407 
9408 // CHECK-LABEL: @test_vpaddl_u16(
9409 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9410 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a)
9411 // CHECK:   ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_u16(uint16x4_t a)9412 uint32x2_t test_vpaddl_u16(uint16x4_t a) {
9413   return vpaddl_u16(a);
9414 }
9415 
9416 // CHECK-LABEL: @test_vpaddl_u32(
9417 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9418 // CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a)
9419 // CHECK:   ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_u32(uint32x2_t a)9420 uint64x1_t test_vpaddl_u32(uint32x2_t a) {
9421   return vpaddl_u32(a);
9422 }
9423 
9424 // CHECK-LABEL: @test_vpaddlq_s8(
9425 // CHECK:   [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a)
9426 // CHECK:   ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_s8(int8x16_t a)9427 int16x8_t test_vpaddlq_s8(int8x16_t a) {
9428   return vpaddlq_s8(a);
9429 }
9430 
9431 // CHECK-LABEL: @test_vpaddlq_s16(
9432 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9433 // CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a)
9434 // CHECK:   ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_s16(int16x8_t a)9435 int32x4_t test_vpaddlq_s16(int16x8_t a) {
9436   return vpaddlq_s16(a);
9437 }
9438 
9439 // CHECK-LABEL: @test_vpaddlq_s32(
9440 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9441 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a)
9442 // CHECK:   ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_s32(int32x4_t a)9443 int64x2_t test_vpaddlq_s32(int32x4_t a) {
9444   return vpaddlq_s32(a);
9445 }
9446 
9447 // CHECK-LABEL: @test_vpaddlq_u8(
9448 // CHECK:   [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a)
9449 // CHECK:   ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_u8(uint8x16_t a)9450 uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
9451   return vpaddlq_u8(a);
9452 }
9453 
9454 // CHECK-LABEL: @test_vpaddlq_u16(
9455 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9456 // CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a)
9457 // CHECK:   ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_u16(uint16x8_t a)9458 uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
9459   return vpaddlq_u16(a);
9460 }
9461 
9462 // CHECK-LABEL: @test_vpaddlq_u32(
9463 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9464 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a)
9465 // CHECK:   ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_u32(uint32x4_t a)9466 uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
9467   return vpaddlq_u32(a);
9468 }
9469 
9470 // CHECK-LABEL: @test_vpmax_s8(
9471 // CHECK:   [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
9472 // CHECK:   ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)9473 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
9474   return vpmax_s8(a, b);
9475 }
9476 
9477 // CHECK-LABEL: @test_vpmax_s16(
9478 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9479 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9480 // CHECK:   [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
9481 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9482 // CHECK:   ret <4 x i16> [[VPMAX_V2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)9483 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
9484   return vpmax_s16(a, b);
9485 }
9486 
9487 // CHECK-LABEL: @test_vpmax_s32(
9488 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9489 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9490 // CHECK:   [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
9491 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9492 // CHECK:   ret <2 x i32> [[VPMAX_V2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)9493 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
9494   return vpmax_s32(a, b);
9495 }
9496 
9497 // CHECK-LABEL: @test_vpmax_u8(
9498 // CHECK:   [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
9499 // CHECK:   ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)9500 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
9501   return vpmax_u8(a, b);
9502 }
9503 
9504 // CHECK-LABEL: @test_vpmax_u16(
9505 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9506 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9507 // CHECK:   [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
9508 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9509 // CHECK:   ret <4 x i16> [[VPMAX_V2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)9510 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
9511   return vpmax_u16(a, b);
9512 }
9513 
9514 // CHECK-LABEL: @test_vpmax_u32(
9515 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9516 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9517 // CHECK:   [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
9518 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9519 // CHECK:   ret <2 x i32> [[VPMAX_V2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)9520 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
9521   return vpmax_u32(a, b);
9522 }
9523 
9524 // CHECK-LABEL: @test_vpmax_f32(
9525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9527 // CHECK:   [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b)
9528 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8>
9529 // CHECK:   ret <2 x float> [[VPMAX_V2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)9530 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
9531   return vpmax_f32(a, b);
9532 }
9533 
9534 // CHECK-LABEL: @test_vpmin_s8(
9535 // CHECK:   [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b)
9536 // CHECK:   ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)9537 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
9538   return vpmin_s8(a, b);
9539 }
9540 
9541 // CHECK-LABEL: @test_vpmin_s16(
9542 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9543 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9544 // CHECK:   [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b)
9545 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9546 // CHECK:   ret <4 x i16> [[VPMIN_V2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)9547 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
9548   return vpmin_s16(a, b);
9549 }
9550 
9551 // CHECK-LABEL: @test_vpmin_s32(
9552 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9553 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9554 // CHECK:   [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b)
9555 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9556 // CHECK:   ret <2 x i32> [[VPMIN_V2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)9557 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
9558   return vpmin_s32(a, b);
9559 }
9560 
9561 // CHECK-LABEL: @test_vpmin_u8(
9562 // CHECK:   [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b)
9563 // CHECK:   ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)9564 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
9565   return vpmin_u8(a, b);
9566 }
9567 
9568 // CHECK-LABEL: @test_vpmin_u16(
9569 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9570 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9571 // CHECK:   [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b)
9572 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9573 // CHECK:   ret <4 x i16> [[VPMIN_V2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)9574 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
9575   return vpmin_u16(a, b);
9576 }
9577 
9578 // CHECK-LABEL: @test_vpmin_u32(
9579 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9580 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9581 // CHECK:   [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b)
9582 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9583 // CHECK:   ret <2 x i32> [[VPMIN_V2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)9584 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
9585   return vpmin_u32(a, b);
9586 }
9587 
9588 // CHECK-LABEL: @test_vpmin_f32(
9589 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9590 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9591 // CHECK:   [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b)
9592 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8>
9593 // CHECK:   ret <2 x float> [[VPMIN_V2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)9594 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
9595   return vpmin_f32(a, b);
9596 }
9597 
9598 // CHECK-LABEL: @test_vqabs_s8(
9599 // CHECK:   [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a)
9600 // CHECK:   ret <8 x i8> [[VQABS_V_I]]
test_vqabs_s8(int8x8_t a)9601 int8x8_t test_vqabs_s8(int8x8_t a) {
9602   return vqabs_s8(a);
9603 }
9604 
9605 // CHECK-LABEL: @test_vqabs_s16(
9606 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9607 // CHECK:   [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a)
9608 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
9609 // CHECK:   ret <4 x i16> [[VQABS_V1_I]]
test_vqabs_s16(int16x4_t a)9610 int16x4_t test_vqabs_s16(int16x4_t a) {
9611   return vqabs_s16(a);
9612 }
9613 
9614 // CHECK-LABEL: @test_vqabs_s32(
9615 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9616 // CHECK:   [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a)
9617 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
9618 // CHECK:   ret <2 x i32> [[VQABS_V1_I]]
test_vqabs_s32(int32x2_t a)9619 int32x2_t test_vqabs_s32(int32x2_t a) {
9620   return vqabs_s32(a);
9621 }
9622 
9623 // CHECK-LABEL: @test_vqabsq_s8(
9624 // CHECK:   [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a)
9625 // CHECK:   ret <16 x i8> [[VQABSQ_V_I]]
test_vqabsq_s8(int8x16_t a)9626 int8x16_t test_vqabsq_s8(int8x16_t a) {
9627   return vqabsq_s8(a);
9628 }
9629 
9630 // CHECK-LABEL: @test_vqabsq_s16(
9631 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9632 // CHECK:   [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a)
9633 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
9634 // CHECK:   ret <8 x i16> [[VQABSQ_V1_I]]
test_vqabsq_s16(int16x8_t a)9635 int16x8_t test_vqabsq_s16(int16x8_t a) {
9636   return vqabsq_s16(a);
9637 }
9638 
9639 // CHECK-LABEL: @test_vqabsq_s32(
9640 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9641 // CHECK:   [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a)
9642 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
9643 // CHECK:   ret <4 x i32> [[VQABSQ_V1_I]]
test_vqabsq_s32(int32x4_t a)9644 int32x4_t test_vqabsq_s32(int32x4_t a) {
9645   return vqabsq_s32(a);
9646 }
9647 
9648 // CHECK-LABEL: @test_vqadd_s8(
9649 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9650 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)9651 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
9652   return vqadd_s8(a, b);
9653 }
9654 
9655 // CHECK-LABEL: @test_vqadd_s16(
9656 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9657 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9658 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9659 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9660 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_s16(int16x4_t a,int16x4_t b)9661 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
9662   return vqadd_s16(a, b);
9663 }
9664 
9665 // CHECK-LABEL: @test_vqadd_s32(
9666 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9667 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9668 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9669 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9670 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_s32(int32x2_t a,int32x2_t b)9671 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
9672   return vqadd_s32(a, b);
9673 }
9674 
9675 // CHECK-LABEL: @test_vqadd_s64(
9676 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9677 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9678 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9679 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9680 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_s64(int64x1_t a,int64x1_t b)9681 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
9682   return vqadd_s64(a, b);
9683 }
9684 
9685 // CHECK-LABEL: @test_vqadd_u8(
9686 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9687 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)9688 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
9689   return vqadd_u8(a, b);
9690 }
9691 
9692 // CHECK-LABEL: @test_vqadd_u16(
9693 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9694 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9695 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9696 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9697 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)9698 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
9699   return vqadd_u16(a, b);
9700 }
9701 
9702 // CHECK-LABEL: @test_vqadd_u32(
9703 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9704 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9705 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9706 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9707 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)9708 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
9709   return vqadd_u32(a, b);
9710 }
9711 
9712 // CHECK-LABEL: @test_vqadd_u64(
9713 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9714 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9715 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9716 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9717 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)9718 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
9719   return vqadd_u64(a, b);
9720 }
9721 
9722 // CHECK-LABEL: @test_vqaddq_s8(
9723 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9724 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)9725 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
9726   return vqaddq_s8(a, b);
9727 }
9728 
9729 // CHECK-LABEL: @test_vqaddq_s16(
9730 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9731 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9732 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9733 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9734 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)9735 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
9736   return vqaddq_s16(a, b);
9737 }
9738 
9739 // CHECK-LABEL: @test_vqaddq_s32(
9740 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9741 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9742 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9743 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9744 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)9745 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
9746   return vqaddq_s32(a, b);
9747 }
9748 
9749 // CHECK-LABEL: @test_vqaddq_s64(
9750 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9751 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9752 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9753 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9754 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)9755 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
9756   return vqaddq_s64(a, b);
9757 }
9758 
9759 // CHECK-LABEL: @test_vqaddq_u8(
9760 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9761 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)9762 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
9763   return vqaddq_u8(a, b);
9764 }
9765 
9766 // CHECK-LABEL: @test_vqaddq_u16(
9767 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9768 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9769 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9770 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9771 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)9772 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
9773   return vqaddq_u16(a, b);
9774 }
9775 
9776 // CHECK-LABEL: @test_vqaddq_u32(
9777 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9778 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9779 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9780 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9781 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)9782 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
9783   return vqaddq_u32(a, b);
9784 }
9785 
9786 // CHECK-LABEL: @test_vqaddq_u64(
9787 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9788 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9789 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9790 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9791 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)9792 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
9793   return vqaddq_u64(a, b);
9794 }
9795 
9796 // CHECK-LABEL: @test_vqdmlal_s16(
9797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9798 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9799 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9800 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9801 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9802 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)9803 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9804   return vqdmlal_s16(a, b, c);
9805 }
9806 
9807 // CHECK-LABEL: @test_vqdmlal_s32(
9808 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9809 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9810 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9811 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9812 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9813 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)9814 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9815   return vqdmlal_s32(a, b, c);
9816 }
9817 
9818 // CHECK-LABEL: @test_vqdmlal_lane_s16(
9819 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9820 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9821 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9822 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9823 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9824 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9825 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
9826 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) #8
9827 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)9828 int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9829   return vqdmlal_lane_s16(a, b, c, 3);
9830 }
9831 
9832 // CHECK-LABEL: @test_vqdmlal_lane_s32(
9833 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9834 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9835 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9836 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9837 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9838 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9839 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
9840 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) #8
9841 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9842 int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9843   return vqdmlal_lane_s32(a, b, c, 1);
9844 }
9845 
9846 // CHECK-LABEL: @test_vqdmlal_n_s16(
9847 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9848 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9849 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9850 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9851 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9852 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9853 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9854 // CHECK:   [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9855 // CHECK:   [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9856 // CHECK:   ret <4 x i32> [[VQDMLAL_V6_I]]
test_vqdmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)9857 int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9858   return vqdmlal_n_s16(a, b, c);
9859 }
9860 
9861 // CHECK-LABEL: @test_vqdmlal_n_s32(
9862 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9863 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9864 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9865 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9866 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9867 // CHECK:   [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9868 // CHECK:   [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9869 // CHECK:   ret <2 x i64> [[VQDMLAL_V4_I]]
test_vqdmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)9870 int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9871   return vqdmlal_n_s32(a, b, c);
9872 }
9873 
9874 // CHECK-LABEL: @test_vqdmlsl_s16(
9875 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9876 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9877 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9878 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9879 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9880 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)9881 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9882   return vqdmlsl_s16(a, b, c);
9883 }
9884 
9885 // CHECK-LABEL: @test_vqdmlsl_s32(
9886 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9887 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9888 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9889 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9890 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9891 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)9892 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9893   return vqdmlsl_s32(a, b, c);
9894 }
9895 
9896 // CHECK-LABEL: @test_vqdmlsl_lane_s16(
9897 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9898 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9899 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9900 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9901 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9902 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9903 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) #8
9904 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) #8
9905 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)9906 int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9907   return vqdmlsl_lane_s16(a, b, c, 3);
9908 }
9909 
9910 // CHECK-LABEL: @test_vqdmlsl_lane_s32(
9911 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9912 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9913 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9914 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9915 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9916 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9917 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) #8
9918 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) #8
9919 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9920 int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9921   return vqdmlsl_lane_s32(a, b, c, 1);
9922 }
9923 
9924 // CHECK-LABEL: @test_vqdmlsl_n_s16(
9925 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9926 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9927 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9928 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9929 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9930 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9931 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9932 // CHECK:   [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9933 // CHECK:   [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9934 // CHECK:   ret <4 x i32> [[VQDMLSL_V6_I]]
test_vqdmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)9935 int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9936   return vqdmlsl_n_s16(a, b, c);
9937 }
9938 
9939 // CHECK-LABEL: @test_vqdmlsl_n_s32(
9940 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9941 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9942 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9943 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9944 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9945 // CHECK:   [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9946 // CHECK:   [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9947 // CHECK:   ret <2 x i64> [[VQDMLSL_V4_I]]
test_vqdmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)9948 int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9949   return vqdmlsl_n_s32(a, b, c);
9950 }
9951 
9952 // CHECK-LABEL: @test_vqdmulh_s16(
9953 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9954 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9955 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
9956 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
9957 // CHECK:   ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)9958 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
9959   return vqdmulh_s16(a, b);
9960 }
9961 
9962 // CHECK-LABEL: @test_vqdmulh_s32(
9963 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9964 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9965 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
9966 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
9967 // CHECK:   ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)9968 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
9969   return vqdmulh_s32(a, b);
9970 }
9971 
9972 // CHECK-LABEL: @test_vqdmulhq_s16(
9973 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9974 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9975 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
9976 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
9977 // CHECK:   ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)9978 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
9979   return vqdmulhq_s16(a, b);
9980 }
9981 
9982 // CHECK-LABEL: @test_vqdmulhq_s32(
9983 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9984 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9985 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
9986 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
9987 // CHECK:   ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)9988 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
9989   return vqdmulhq_s32(a, b);
9990 }
9991 
9992 // CHECK-LABEL: @test_vqdmulh_lane_s16(
9993 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9994 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9995 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9996 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
9997 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9998 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
9999 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
10000 // CHECK:   ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_lane_s16(int16x4_t a,int16x4_t b)10001 int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) {
10002   return vqdmulh_lane_s16(a, b, 3);
10003 }
10004 
10005 // CHECK-LABEL: @test_vqdmulh_lane_s32(
10006 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10007 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10008 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10009 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10010 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10011 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
10012 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
10013 // CHECK:   ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_lane_s32(int32x2_t a,int32x2_t b)10014 int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) {
10015   return vqdmulh_lane_s32(a, b, 1);
10016 }
10017 
10018 // CHECK-LABEL: @test_vqdmulhq_lane_s16(
10019 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10020 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10021 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10022 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
10023 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
10024 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]]) #8
10025 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
10026 // CHECK:   ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_lane_s16(int16x8_t a,int16x4_t b)10027 int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
10028   return vqdmulhq_lane_s16(a, b, 3);
10029 }
10030 
10031 // CHECK-LABEL: @test_vqdmulhq_lane_s32(
10032 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10033 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10034 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10035 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
10036 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
10037 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]]) #8
10038 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
10039 // CHECK:   ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_lane_s32(int32x4_t a,int32x2_t b)10040 int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
10041   return vqdmulhq_lane_s32(a, b, 1);
10042 }
10043 
10044 // CHECK-LABEL: @test_vqdmulh_n_s16(
10045 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10046 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10047 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10048 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10049 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10050 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10051 // CHECK:   [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10052 // CHECK:   [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8>
10053 // CHECK:   ret <4 x i16> [[VQDMULH_V5_I]]
test_vqdmulh_n_s16(int16x4_t a,int16_t b)10054 int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
10055   return vqdmulh_n_s16(a, b);
10056 }
10057 
10058 // CHECK-LABEL: @test_vqdmulh_n_s32(
10059 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10060 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10061 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10062 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10063 // CHECK:   [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10064 // CHECK:   [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8>
10065 // CHECK:   ret <2 x i32> [[VQDMULH_V3_I]]
test_vqdmulh_n_s32(int32x2_t a,int32_t b)10066 int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
10067   return vqdmulh_n_s32(a, b);
10068 }
10069 
10070 // CHECK-LABEL: @test_vqdmulhq_n_s16(
10071 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10072 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10073 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10074 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10075 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10076 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10077 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10078 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10079 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10080 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
10081 // CHECK:   [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
10082 // CHECK:   [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8>
10083 // CHECK:   ret <8 x i16> [[VQDMULHQ_V9_I]]
test_vqdmulhq_n_s16(int16x8_t a,int16_t b)10084 int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
10085   return vqdmulhq_n_s16(a, b);
10086 }
10087 
10088 // CHECK-LABEL: @test_vqdmulhq_n_s32(
10089 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10090 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10091 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10092 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10093 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10094 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
10095 // CHECK:   [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
10096 // CHECK:   [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8>
10097 // CHECK:   ret <4 x i32> [[VQDMULHQ_V5_I]]
test_vqdmulhq_n_s32(int32x4_t a,int32_t b)10098 int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
10099   return vqdmulhq_n_s32(a, b);
10100 }
10101 
10102 // CHECK-LABEL: @test_vqdmull_s16(
10103 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10104 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10105 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
10106 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
10107 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)10108 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
10109   return vqdmull_s16(a, b);
10110 }
10111 
10112 // CHECK-LABEL: @test_vqdmull_s32(
10113 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10114 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10115 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
10116 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
10117 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)10118 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
10119   return vqdmull_s32(a, b);
10120 }
10121 
10122 // CHECK-LABEL: @test_vqdmull_lane_s16(
10123 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10124 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10125 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10126 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10127 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10128 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
10129 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
10130 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_lane_s16(int16x4_t a,int16x4_t b)10131 int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) {
10132   return vqdmull_lane_s16(a, b, 3);
10133 }
10134 
10135 // CHECK-LABEL: @test_vqdmull_lane_s32(
10136 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10137 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10138 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10139 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10140 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10141 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
10142 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
10143 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_lane_s32(int32x2_t a,int32x2_t b)10144 int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) {
10145   return vqdmull_lane_s32(a, b, 1);
10146 }
10147 
10148 // CHECK-LABEL: @test_vqdmull_n_s16(
10149 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10150 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10151 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10152 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10153 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10154 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10155 // CHECK:   [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10156 // CHECK:   [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8>
10157 // CHECK:   ret <4 x i32> [[VQDMULL_V5_I]]
test_vqdmull_n_s16(int16x4_t a,int16_t b)10158 int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
10159   return vqdmull_n_s16(a, b);
10160 }
10161 
10162 // CHECK-LABEL: @test_vqdmull_n_s32(
10163 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10164 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10165 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10166 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10167 // CHECK:   [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10168 // CHECK:   [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8>
10169 // CHECK:   ret <2 x i64> [[VQDMULL_V3_I]]
test_vqdmull_n_s32(int32x2_t a,int32_t b)10170 int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
10171   return vqdmull_n_s32(a, b);
10172 }
10173 
10174 // CHECK-LABEL: @test_vqmovn_s16(
10175 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10176 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a)
10177 // CHECK:   ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_s16(int16x8_t a)10178 int8x8_t test_vqmovn_s16(int16x8_t a) {
10179   return vqmovn_s16(a);
10180 }
10181 
10182 // CHECK-LABEL: @test_vqmovn_s32(
10183 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10184 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a)
10185 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
10186 // CHECK:   ret <4 x i16> [[VQMOVN_V1_I]]
test_vqmovn_s32(int32x4_t a)10187 int16x4_t test_vqmovn_s32(int32x4_t a) {
10188   return vqmovn_s32(a);
10189 }
10190 
10191 // CHECK-LABEL: @test_vqmovn_s64(
10192 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10193 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a)
10194 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
10195 // CHECK:   ret <2 x i32> [[VQMOVN_V1_I]]
test_vqmovn_s64(int64x2_t a)10196 int32x2_t test_vqmovn_s64(int64x2_t a) {
10197   return vqmovn_s64(a);
10198 }
10199 
10200 // CHECK-LABEL: @test_vqmovn_u16(
10201 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10202 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a)
10203 // CHECK:   ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_u16(uint16x8_t a)10204 uint8x8_t test_vqmovn_u16(uint16x8_t a) {
10205   return vqmovn_u16(a);
10206 }
10207 
10208 // CHECK-LABEL: @test_vqmovn_u32(
10209 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10210 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a)
10211 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
10212 // CHECK:   ret <4 x i16> [[VQMOVN_V1_I]]
test_vqmovn_u32(uint32x4_t a)10213 uint16x4_t test_vqmovn_u32(uint32x4_t a) {
10214   return vqmovn_u32(a);
10215 }
10216 
10217 // CHECK-LABEL: @test_vqmovn_u64(
10218 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10219 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a)
10220 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
10221 // CHECK:   ret <2 x i32> [[VQMOVN_V1_I]]
test_vqmovn_u64(uint64x2_t a)10222 uint32x2_t test_vqmovn_u64(uint64x2_t a) {
10223   return vqmovn_u64(a);
10224 }
10225 
10226 // CHECK-LABEL: @test_vqmovun_s16(
10227 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10228 // CHECK:   [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a)
10229 // CHECK:   ret <8 x i8> [[VQMOVUN_V1_I]]
test_vqmovun_s16(int16x8_t a)10230 uint8x8_t test_vqmovun_s16(int16x8_t a) {
10231   return vqmovun_s16(a);
10232 }
10233 
10234 // CHECK-LABEL: @test_vqmovun_s32(
10235 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10236 // CHECK:   [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a)
10237 // CHECK:   [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
10238 // CHECK:   ret <4 x i16> [[VQMOVUN_V1_I]]
test_vqmovun_s32(int32x4_t a)10239 uint16x4_t test_vqmovun_s32(int32x4_t a) {
10240   return vqmovun_s32(a);
10241 }
10242 
10243 // CHECK-LABEL: @test_vqmovun_s64(
10244 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10245 // CHECK:   [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a)
10246 // CHECK:   [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
10247 // CHECK:   ret <2 x i32> [[VQMOVUN_V1_I]]
test_vqmovun_s64(int64x2_t a)10248 uint32x2_t test_vqmovun_s64(int64x2_t a) {
10249   return vqmovun_s64(a);
10250 }
10251 
10252 // CHECK-LABEL: @test_vqneg_s8(
10253 // CHECK:   [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a)
10254 // CHECK:   ret <8 x i8> [[VQNEG_V_I]]
test_vqneg_s8(int8x8_t a)10255 int8x8_t test_vqneg_s8(int8x8_t a) {
10256   return vqneg_s8(a);
10257 }
10258 
10259 // CHECK-LABEL: @test_vqneg_s16(
10260 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10261 // CHECK:   [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a)
10262 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
10263 // CHECK:   ret <4 x i16> [[VQNEG_V1_I]]
test_vqneg_s16(int16x4_t a)10264 int16x4_t test_vqneg_s16(int16x4_t a) {
10265   return vqneg_s16(a);
10266 }
10267 
10268 // CHECK-LABEL: @test_vqneg_s32(
10269 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10270 // CHECK:   [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a)
10271 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
10272 // CHECK:   ret <2 x i32> [[VQNEG_V1_I]]
test_vqneg_s32(int32x2_t a)10273 int32x2_t test_vqneg_s32(int32x2_t a) {
10274   return vqneg_s32(a);
10275 }
10276 
10277 // CHECK-LABEL: @test_vqnegq_s8(
10278 // CHECK:   [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a)
10279 // CHECK:   ret <16 x i8> [[VQNEGQ_V_I]]
test_vqnegq_s8(int8x16_t a)10280 int8x16_t test_vqnegq_s8(int8x16_t a) {
10281   return vqnegq_s8(a);
10282 }
10283 
10284 // CHECK-LABEL: @test_vqnegq_s16(
10285 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10286 // CHECK:   [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a)
10287 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
10288 // CHECK:   ret <8 x i16> [[VQNEGQ_V1_I]]
test_vqnegq_s16(int16x8_t a)10289 int16x8_t test_vqnegq_s16(int16x8_t a) {
10290   return vqnegq_s16(a);
10291 }
10292 
10293 // CHECK-LABEL: @test_vqnegq_s32(
10294 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10295 // CHECK:   [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a)
10296 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
10297 // CHECK:   ret <4 x i32> [[VQNEGQ_V1_I]]
test_vqnegq_s32(int32x4_t a)10298 int32x4_t test_vqnegq_s32(int32x4_t a) {
10299   return vqnegq_s32(a);
10300 }
10301 
10302 // CHECK-LABEL: @test_vqrdmulh_s16(
10303 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10304 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10305 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
10306 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
10307 // CHECK:   ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)10308 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
10309   return vqrdmulh_s16(a, b);
10310 }
10311 
10312 // CHECK-LABEL: @test_vqrdmulh_s32(
10313 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10314 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10315 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
10316 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
10317 // CHECK:   ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)10318 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
10319   return vqrdmulh_s32(a, b);
10320 }
10321 
10322 // CHECK-LABEL: @test_vqrdmulhq_s16(
10323 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10324 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10325 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
10326 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
10327 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)10328 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
10329   return vqrdmulhq_s16(a, b);
10330 }
10331 
10332 // CHECK-LABEL: @test_vqrdmulhq_s32(
10333 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10334 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10335 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
10336 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
10337 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)10338 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
10339   return vqrdmulhq_s32(a, b);
10340 }
10341 
10342 // CHECK-LABEL: @test_vqrdmulh_lane_s16(
10343 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10344 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10345 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10346 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10347 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10348 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]]) #8
10349 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
10350 // CHECK:   ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_lane_s16(int16x4_t a,int16x4_t b)10351 int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
10352   return vqrdmulh_lane_s16(a, b, 3);
10353 }
10354 
10355 // CHECK-LABEL: @test_vqrdmulh_lane_s32(
10356 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10357 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10358 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10359 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10360 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10361 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]]) #8
10362 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
10363 // CHECK:   ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_lane_s32(int32x2_t a,int32x2_t b)10364 int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) {
10365   return vqrdmulh_lane_s32(a, b, 1);
10366 }
10367 
10368 // CHECK-LABEL: @test_vqrdmulhq_lane_s16(
10369 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10370 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10371 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10372 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
10373 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
10374 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]]) #8
10375 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
10376 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_lane_s16(int16x8_t a,int16x4_t b)10377 int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
10378   return vqrdmulhq_lane_s16(a, b, 3);
10379 }
10380 
10381 // CHECK-LABEL: @test_vqrdmulhq_lane_s32(
10382 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10383 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10384 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10385 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
10386 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
10387 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]]) #8
10388 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
10389 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_lane_s32(int32x4_t a,int32x2_t b)10390 int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
10391   return vqrdmulhq_lane_s32(a, b, 1);
10392 }
10393 
10394 // CHECK-LABEL: @test_vqrdmulh_n_s16(
10395 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10396 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10397 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10398 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10399 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10400 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10401 // CHECK:   [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10402 // CHECK:   [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8>
10403 // CHECK:   ret <4 x i16> [[VQRDMULH_V5_I]]
test_vqrdmulh_n_s16(int16x4_t a,int16_t b)10404 int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
10405   return vqrdmulh_n_s16(a, b);
10406 }
10407 
10408 // CHECK-LABEL: @test_vqrdmulh_n_s32(
10409 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10410 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10411 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10412 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10413 // CHECK:   [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10414 // CHECK:   [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8>
10415 // CHECK:   ret <2 x i32> [[VQRDMULH_V3_I]]
test_vqrdmulh_n_s32(int32x2_t a,int32_t b)10416 int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
10417   return vqrdmulh_n_s32(a, b);
10418 }
10419 
10420 // CHECK-LABEL: @test_vqrdmulhq_n_s16(
10421 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10422 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10423 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10424 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10425 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10426 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10427 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10428 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10429 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10430 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
10431 // CHECK:   [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
10432 // CHECK:   [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8>
10433 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V9_I]]
test_vqrdmulhq_n_s16(int16x8_t a,int16_t b)10434 int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
10435   return vqrdmulhq_n_s16(a, b);
10436 }
10437 
10438 // CHECK-LABEL: @test_vqrdmulhq_n_s32(
10439 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10440 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10441 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10442 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10443 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10444 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
10445 // CHECK:   [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
10446 // CHECK:   [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8>
10447 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V5_I]]
test_vqrdmulhq_n_s32(int32x4_t a,int32_t b)10448 int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
10449   return vqrdmulhq_n_s32(a, b);
10450 }
10451 
10452 // CHECK-LABEL: @test_vqrshl_s8(
10453 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10454 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)10455 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
10456   return vqrshl_s8(a, b);
10457 }
10458 
10459 // CHECK-LABEL: @test_vqrshl_s16(
10460 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10461 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10462 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10463 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10464 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)10465 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
10466   return vqrshl_s16(a, b);
10467 }
10468 
10469 // CHECK-LABEL: @test_vqrshl_s32(
10470 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10471 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10472 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10473 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10474 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)10475 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
10476   return vqrshl_s32(a, b);
10477 }
10478 
10479 // CHECK-LABEL: @test_vqrshl_s64(
10480 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10481 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10482 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10483 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10484 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)10485 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
10486   return vqrshl_s64(a, b);
10487 }
10488 
10489 // CHECK-LABEL: @test_vqrshl_u8(
10490 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10491 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)10492 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
10493   return vqrshl_u8(a, b);
10494 }
10495 
10496 // CHECK-LABEL: @test_vqrshl_u16(
10497 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10498 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10499 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10500 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10501 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)10502 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
10503   return vqrshl_u16(a, b);
10504 }
10505 
10506 // CHECK-LABEL: @test_vqrshl_u32(
10507 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10508 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10509 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10510 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10511 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)10512 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
10513   return vqrshl_u32(a, b);
10514 }
10515 
10516 // CHECK-LABEL: @test_vqrshl_u64(
10517 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10518 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10519 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10520 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10521 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)10522 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
10523   return vqrshl_u64(a, b);
10524 }
10525 
10526 // CHECK-LABEL: @test_vqrshlq_s8(
10527 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10528 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)10529 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
10530   return vqrshlq_s8(a, b);
10531 }
10532 
10533 // CHECK-LABEL: @test_vqrshlq_s16(
10534 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10535 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10536 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10537 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10538 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)10539 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
10540   return vqrshlq_s16(a, b);
10541 }
10542 
10543 // CHECK-LABEL: @test_vqrshlq_s32(
10544 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10545 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10546 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10547 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10548 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)10549 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
10550   return vqrshlq_s32(a, b);
10551 }
10552 
10553 // CHECK-LABEL: @test_vqrshlq_s64(
10554 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10555 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10556 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10557 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10558 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)10559 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
10560   return vqrshlq_s64(a, b);
10561 }
10562 
10563 // CHECK-LABEL: @test_vqrshlq_u8(
10564 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10565 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)10566 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
10567   return vqrshlq_u8(a, b);
10568 }
10569 
10570 // CHECK-LABEL: @test_vqrshlq_u16(
10571 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10572 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10573 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10574 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10575 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)10576 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
10577   return vqrshlq_u16(a, b);
10578 }
10579 
10580 // CHECK-LABEL: @test_vqrshlq_u32(
10581 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10582 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10583 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10584 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10585 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)10586 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
10587   return vqrshlq_u32(a, b);
10588 }
10589 
10590 // CHECK-LABEL: @test_vqrshlq_u64(
10591 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10592 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10593 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10594 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10595 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)10596 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
10597   return vqrshlq_u64(a, b);
10598 }
10599 
10600 // CHECK-LABEL: @test_vqrshrn_n_s16(
10601 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10602 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10603 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10604 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)10605 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
10606   return vqrshrn_n_s16(a, 1);
10607 }
10608 
10609 // CHECK-LABEL: @test_vqrshrn_n_s32(
10610 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10611 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10612 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10613 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)10614 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
10615   return vqrshrn_n_s32(a, 1);
10616 }
10617 
10618 // CHECK-LABEL: @test_vqrshrn_n_s64(
10619 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10620 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10621 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10622 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)10623 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
10624   return vqrshrn_n_s64(a, 1);
10625 }
10626 
10627 // CHECK-LABEL: @test_vqrshrn_n_u16(
10628 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10629 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10630 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10631 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)10632 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
10633   return vqrshrn_n_u16(a, 1);
10634 }
10635 
10636 // CHECK-LABEL: @test_vqrshrn_n_u32(
10637 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10638 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10639 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10640 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)10641 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
10642   return vqrshrn_n_u32(a, 1);
10643 }
10644 
10645 // CHECK-LABEL: @test_vqrshrn_n_u64(
10646 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10647 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10648 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10649 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)10650 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
10651   return vqrshrn_n_u64(a, 1);
10652 }
10653 
10654 // CHECK-LABEL: @test_vqrshrun_n_s16(
10655 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10656 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10657 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10658 // CHECK:   ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)10659 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
10660   return vqrshrun_n_s16(a, 1);
10661 }
10662 
10663 // CHECK-LABEL: @test_vqrshrun_n_s32(
10664 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10665 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10666 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10667 // CHECK:   ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)10668 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
10669   return vqrshrun_n_s32(a, 1);
10670 }
10671 
10672 // CHECK-LABEL: @test_vqrshrun_n_s64(
10673 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10674 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10675 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
10676 // CHECK:   ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)10677 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
10678   return vqrshrun_n_s64(a, 1);
10679 }
10680 
10681 // CHECK-LABEL: @test_vqshl_s8(
10682 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10683 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)10684 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
10685   return vqshl_s8(a, b);
10686 }
10687 
10688 // CHECK-LABEL: @test_vqshl_s16(
10689 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10690 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10691 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10692 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10693 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_s16(int16x4_t a,int16x4_t b)10694 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
10695   return vqshl_s16(a, b);
10696 }
10697 
10698 // CHECK-LABEL: @test_vqshl_s32(
10699 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10700 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10701 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10702 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10703 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_s32(int32x2_t a,int32x2_t b)10704 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
10705   return vqshl_s32(a, b);
10706 }
10707 
10708 // CHECK-LABEL: @test_vqshl_s64(
10709 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10710 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10711 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10712 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10713 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_s64(int64x1_t a,int64x1_t b)10714 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
10715   return vqshl_s64(a, b);
10716 }
10717 
10718 // CHECK-LABEL: @test_vqshl_u8(
10719 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10720 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)10721 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
10722   return vqshl_u8(a, b);
10723 }
10724 
10725 // CHECK-LABEL: @test_vqshl_u16(
10726 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10727 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10728 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10729 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10730 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)10731 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
10732   return vqshl_u16(a, b);
10733 }
10734 
10735 // CHECK-LABEL: @test_vqshl_u32(
10736 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10737 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10738 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10739 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10740 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)10741 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
10742   return vqshl_u32(a, b);
10743 }
10744 
10745 // CHECK-LABEL: @test_vqshl_u64(
10746 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10747 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10748 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10749 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10750 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)10751 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
10752   return vqshl_u64(a, b);
10753 }
10754 
10755 // CHECK-LABEL: @test_vqshlq_s8(
10756 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10757 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)10758 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
10759   return vqshlq_s8(a, b);
10760 }
10761 
10762 // CHECK-LABEL: @test_vqshlq_s16(
10763 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10764 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10765 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10766 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10767 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)10768 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
10769   return vqshlq_s16(a, b);
10770 }
10771 
10772 // CHECK-LABEL: @test_vqshlq_s32(
10773 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10774 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10775 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10776 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10777 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)10778 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
10779   return vqshlq_s32(a, b);
10780 }
10781 
10782 // CHECK-LABEL: @test_vqshlq_s64(
10783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10784 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10785 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10786 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10787 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)10788 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
10789   return vqshlq_s64(a, b);
10790 }
10791 
10792 // CHECK-LABEL: @test_vqshlq_u8(
10793 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10794 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)10795 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
10796   return vqshlq_u8(a, b);
10797 }
10798 
10799 // CHECK-LABEL: @test_vqshlq_u16(
10800 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10801 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10802 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10803 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10804 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)10805 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
10806   return vqshlq_u16(a, b);
10807 }
10808 
10809 // CHECK-LABEL: @test_vqshlq_u32(
10810 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10811 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10812 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10813 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10814 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)10815 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
10816   return vqshlq_u32(a, b);
10817 }
10818 
10819 // CHECK-LABEL: @test_vqshlq_u64(
10820 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10821 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10822 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10823 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10824 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)10825 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
10826   return vqshlq_u64(a, b);
10827 }
10828 
10829 // CHECK-LABEL: @test_vqshlu_n_s8(
10830 // CHECK:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10831 // CHECK:   ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)10832 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
10833   return vqshlu_n_s8(a, 1);
10834 }
10835 
10836 // CHECK-LABEL: @test_vqshlu_n_s16(
10837 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10838 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10839 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10840 // CHECK:   ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)10841 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
10842   return vqshlu_n_s16(a, 1);
10843 }
10844 
10845 // CHECK-LABEL: @test_vqshlu_n_s32(
10846 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10847 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10848 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 1, i32 1>)
10849 // CHECK:   ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)10850 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
10851   return vqshlu_n_s32(a, 1);
10852 }
10853 
10854 // CHECK-LABEL: @test_vqshlu_n_s64(
10855 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10856 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10857 // CHECK:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
10858 // CHECK:   ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)10859 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
10860   return vqshlu_n_s64(a, 1);
10861 }
10862 
10863 // CHECK-LABEL: @test_vqshluq_n_s8(
10864 // CHECK:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10865 // CHECK:   ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)10866 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
10867   return vqshluq_n_s8(a, 1);
10868 }
10869 
10870 // CHECK-LABEL: @test_vqshluq_n_s16(
10871 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10872 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10873 // CHECK:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
10874 // CHECK:   ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)10875 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
10876   return vqshluq_n_s16(a, 1);
10877 }
10878 
10879 // CHECK-LABEL: @test_vqshluq_n_s32(
10880 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10881 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10882 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
10883 // CHECK:   ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)10884 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
10885   return vqshluq_n_s32(a, 1);
10886 }
10887 
10888 // CHECK-LABEL: @test_vqshluq_n_s64(
10889 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10890 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10891 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 1, i64 1>)
10892 // CHECK:   ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)10893 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
10894   return vqshluq_n_s64(a, 1);
10895 }
10896 
10897 // CHECK-LABEL: @test_vqshl_n_s8(
10898 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10899 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)10900 int8x8_t test_vqshl_n_s8(int8x8_t a) {
10901   return vqshl_n_s8(a, 1);
10902 }
10903 
10904 // CHECK-LABEL: @test_vqshl_n_s16(
10905 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10906 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10907 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10908 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)10909 int16x4_t test_vqshl_n_s16(int16x4_t a) {
10910   return vqshl_n_s16(a, 1);
10911 }
10912 
10913 // CHECK-LABEL: @test_vqshl_n_s32(
10914 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10915 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10916 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
10917 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)10918 int32x2_t test_vqshl_n_s32(int32x2_t a) {
10919   return vqshl_n_s32(a, 1);
10920 }
10921 
10922 // CHECK-LABEL: @test_vqshl_n_s64(
10923 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10924 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10925 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
10926 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)10927 int64x1_t test_vqshl_n_s64(int64x1_t a) {
10928   return vqshl_n_s64(a, 1);
10929 }
10930 
10931 // CHECK-LABEL: @test_vqshl_n_u8(
10932 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10933 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)10934 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
10935   return vqshl_n_u8(a, 1);
10936 }
10937 
10938 // CHECK-LABEL: @test_vqshl_n_u16(
10939 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10940 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10941 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10942 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)10943 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
10944   return vqshl_n_u16(a, 1);
10945 }
10946 
10947 // CHECK-LABEL: @test_vqshl_n_u32(
10948 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10949 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10950 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
10951 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)10952 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
10953   return vqshl_n_u32(a, 1);
10954 }
10955 
10956 // CHECK-LABEL: @test_vqshl_n_u64(
10957 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10958 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10959 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
10960 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)10961 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
10962   return vqshl_n_u64(a, 1);
10963 }
10964 
10965 // CHECK-LABEL: @test_vqshlq_n_s8(
10966 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10967 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)10968 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
10969   return vqshlq_n_s8(a, 1);
10970 }
10971 
10972 // CHECK-LABEL: @test_vqshlq_n_s16(
10973 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10974 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10975 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
10976 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)10977 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
10978   return vqshlq_n_s16(a, 1);
10979 }
10980 
10981 // CHECK-LABEL: @test_vqshlq_n_s32(
10982 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10983 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10984 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
10985 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)10986 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
10987   return vqshlq_n_s32(a, 1);
10988 }
10989 
10990 // CHECK-LABEL: @test_vqshlq_n_s64(
10991 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10992 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10993 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
10994 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)10995 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
10996   return vqshlq_n_s64(a, 1);
10997 }
10998 
10999 // CHECK-LABEL: @test_vqshlq_n_u8(
11000 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
11001 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)11002 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
11003   return vqshlq_n_u8(a, 1);
11004 }
11005 
11006 // CHECK-LABEL: @test_vqshlq_n_u16(
11007 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11008 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11009 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
11010 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)11011 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
11012   return vqshlq_n_u16(a, 1);
11013 }
11014 
11015 // CHECK-LABEL: @test_vqshlq_n_u32(
11016 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11017 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11018 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
11019 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)11020 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
11021   return vqshlq_n_u32(a, 1);
11022 }
11023 
11024 // CHECK-LABEL: @test_vqshlq_n_u64(
11025 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11026 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11027 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
11028 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)11029 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
11030   return vqshlq_n_u64(a, 1);
11031 }
11032 
11033 // CHECK-LABEL: @test_vqshrn_n_s16(
11034 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11035 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11036 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11037 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)11038 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
11039   return vqshrn_n_s16(a, 1);
11040 }
11041 
11042 // CHECK-LABEL: @test_vqshrn_n_s32(
11043 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11044 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11045 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11046 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)11047 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
11048   return vqshrn_n_s32(a, 1);
11049 }
11050 
11051 // CHECK-LABEL: @test_vqshrn_n_s64(
11052 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11053 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11054 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
11055 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)11056 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
11057   return vqshrn_n_s64(a, 1);
11058 }
11059 
11060 // CHECK-LABEL: @test_vqshrn_n_u16(
11061 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11062 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11063 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11064 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)11065 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
11066   return vqshrn_n_u16(a, 1);
11067 }
11068 
11069 // CHECK-LABEL: @test_vqshrn_n_u32(
11070 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11071 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11072 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11073 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)11074 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
11075   return vqshrn_n_u32(a, 1);
11076 }
11077 
11078 // CHECK-LABEL: @test_vqshrn_n_u64(
11079 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11080 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11081 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
11082 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)11083 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
11084   return vqshrn_n_u64(a, 1);
11085 }
11086 
11087 // CHECK-LABEL: @test_vqshrun_n_s16(
11088 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11089 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11090 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11091 // CHECK:   ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)11092 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
11093   return vqshrun_n_s16(a, 1);
11094 }
11095 
11096 // CHECK-LABEL: @test_vqshrun_n_s32(
11097 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11098 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11099 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11100 // CHECK:   ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)11101 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
11102   return vqshrun_n_s32(a, 1);
11103 }
11104 
11105 // CHECK-LABEL: @test_vqshrun_n_s64(
11106 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11107 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11108 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
11109 // CHECK:   ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)11110 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
11111   return vqshrun_n_s64(a, 1);
11112 }
11113 
11114 // CHECK-LABEL: @test_vqsub_s8(
11115 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
11116 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)11117 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
11118   return vqsub_s8(a, b);
11119 }
11120 
11121 // CHECK-LABEL: @test_vqsub_s16(
11122 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11123 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11124 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
11125 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
11126 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_s16(int16x4_t a,int16x4_t b)11127 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
11128   return vqsub_s16(a, b);
11129 }
11130 
11131 // CHECK-LABEL: @test_vqsub_s32(
11132 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11133 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11134 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
11135 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
11136 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_s32(int32x2_t a,int32x2_t b)11137 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
11138   return vqsub_s32(a, b);
11139 }
11140 
11141 // CHECK-LABEL: @test_vqsub_s64(
11142 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11143 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11144 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
11145 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
11146 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_s64(int64x1_t a,int64x1_t b)11147 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
11148   return vqsub_s64(a, b);
11149 }
11150 
11151 // CHECK-LABEL: @test_vqsub_u8(
11152 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
11153 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)11154 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
11155   return vqsub_u8(a, b);
11156 }
11157 
11158 // CHECK-LABEL: @test_vqsub_u16(
11159 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11160 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11161 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
11162 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
11163 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)11164 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
11165   return vqsub_u16(a, b);
11166 }
11167 
11168 // CHECK-LABEL: @test_vqsub_u32(
11169 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11170 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11171 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
11172 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
11173 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)11174 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
11175   return vqsub_u32(a, b);
11176 }
11177 
11178 // CHECK-LABEL: @test_vqsub_u64(
11179 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11180 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11181 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
11182 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
11183 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)11184 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
11185   return vqsub_u64(a, b);
11186 }
11187 
11188 // CHECK-LABEL: @test_vqsubq_s8(
11189 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
11190 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)11191 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
11192   return vqsubq_s8(a, b);
11193 }
11194 
11195 // CHECK-LABEL: @test_vqsubq_s16(
11196 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11197 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11198 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
11199 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
11200 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)11201 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
11202   return vqsubq_s16(a, b);
11203 }
11204 
11205 // CHECK-LABEL: @test_vqsubq_s32(
11206 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11207 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11208 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
11209 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
11210 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)11211 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
11212   return vqsubq_s32(a, b);
11213 }
11214 
11215 // CHECK-LABEL: @test_vqsubq_s64(
11216 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11217 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11218 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
11219 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
11220 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)11221 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
11222   return vqsubq_s64(a, b);
11223 }
11224 
11225 // CHECK-LABEL: @test_vqsubq_u8(
11226 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
11227 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)11228 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
11229   return vqsubq_u8(a, b);
11230 }
11231 
11232 // CHECK-LABEL: @test_vqsubq_u16(
11233 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11234 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11235 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
11236 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
11237 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)11238 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
11239   return vqsubq_u16(a, b);
11240 }
11241 
11242 // CHECK-LABEL: @test_vqsubq_u32(
11243 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11244 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11245 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
11246 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
11247 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)11248 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
11249   return vqsubq_u32(a, b);
11250 }
11251 
11252 // CHECK-LABEL: @test_vqsubq_u64(
11253 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11254 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11255 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
11256 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
11257 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)11258 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
11259   return vqsubq_u64(a, b);
11260 }
11261 
11262 // CHECK-LABEL: @test_vraddhn_s16(
11263 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11264 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11265 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
11266 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)11267 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
11268   return vraddhn_s16(a, b);
11269 }
11270 
11271 // CHECK-LABEL: @test_vraddhn_s32(
11272 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11273 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11274 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
11275 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
11276 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)11277 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
11278   return vraddhn_s32(a, b);
11279 }
11280 
11281 // CHECK-LABEL: @test_vraddhn_s64(
11282 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11283 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11284 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
11285 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
11286 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)11287 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
11288   return vraddhn_s64(a, b);
11289 }
11290 
11291 // CHECK-LABEL: @test_vraddhn_u16(
11292 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11293 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11294 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
11295 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)11296 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
11297   return vraddhn_u16(a, b);
11298 }
11299 
11300 // CHECK-LABEL: @test_vraddhn_u32(
11301 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11302 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11303 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
11304 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
11305 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)11306 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
11307   return vraddhn_u32(a, b);
11308 }
11309 
11310 // CHECK-LABEL: @test_vraddhn_u64(
11311 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11312 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11313 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
11314 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
11315 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)11316 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
11317   return vraddhn_u64(a, b);
11318 }
11319 
11320 // CHECK-LABEL: @test_vrecpe_f32(
11321 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11322 // CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a)
11323 // CHECK:   ret <2 x float> [[VRECPE_V1_I]]
test_vrecpe_f32(float32x2_t a)11324 float32x2_t test_vrecpe_f32(float32x2_t a) {
11325   return vrecpe_f32(a);
11326 }
11327 
11328 // CHECK-LABEL: @test_vrecpe_u32(
11329 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11330 // CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a)
11331 // CHECK:   ret <2 x i32> [[VRECPE_V1_I]]
test_vrecpe_u32(uint32x2_t a)11332 uint32x2_t test_vrecpe_u32(uint32x2_t a) {
11333   return vrecpe_u32(a);
11334 }
11335 
11336 // CHECK-LABEL: @test_vrecpeq_f32(
11337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
11338 // CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a)
11339 // CHECK:   ret <4 x float> [[VRECPEQ_V1_I]]
test_vrecpeq_f32(float32x4_t a)11340 float32x4_t test_vrecpeq_f32(float32x4_t a) {
11341   return vrecpeq_f32(a);
11342 }
11343 
11344 // CHECK-LABEL: @test_vrecpeq_u32(
11345 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11346 // CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a)
11347 // CHECK:   ret <4 x i32> [[VRECPEQ_V1_I]]
test_vrecpeq_u32(uint32x4_t a)11348 uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
11349   return vrecpeq_u32(a);
11350 }
11351 
11352 // CHECK-LABEL: @test_vrecps_f32(
11353 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11354 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11355 // CHECK:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b)
11356 // CHECK:   [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
11357 // CHECK:   ret <2 x float> [[VRECPS_V2_I]]
test_vrecps_f32(float32x2_t a,float32x2_t b)11358 float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) {
11359   return vrecps_f32(a, b);
11360 }
11361 
11362 // CHECK-LABEL: @test_vrecpsq_f32(
11363 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
11364 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
11365 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b)
11366 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
11367 // CHECK:   ret <4 x float> [[VRECPSQ_V2_I]]
test_vrecpsq_f32(float32x4_t a,float32x4_t b)11368 float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) {
11369   return vrecpsq_f32(a, b);
11370 }
11371 
11372 // CHECK-LABEL: @test_vreinterpret_s8_s16(
11373 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11374 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)11375 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
11376   return vreinterpret_s8_s16(a);
11377 }
11378 
11379 // CHECK-LABEL: @test_vreinterpret_s8_s32(
11380 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11381 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)11382 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
11383   return vreinterpret_s8_s32(a);
11384 }
11385 
11386 // CHECK-LABEL: @test_vreinterpret_s8_s64(
11387 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11388 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)11389 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
11390   return vreinterpret_s8_s64(a);
11391 }
11392 
11393 // CHECK-LABEL: @test_vreinterpret_s8_u8(
11394 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)11395 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
11396   return vreinterpret_s8_u8(a);
11397 }
11398 
11399 // CHECK-LABEL: @test_vreinterpret_s8_u16(
11400 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11401 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)11402 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
11403   return vreinterpret_s8_u16(a);
11404 }
11405 
11406 // CHECK-LABEL: @test_vreinterpret_s8_u32(
11407 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11408 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)11409 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
11410   return vreinterpret_s8_u32(a);
11411 }
11412 
11413 // CHECK-LABEL: @test_vreinterpret_s8_u64(
11414 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11415 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)11416 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
11417   return vreinterpret_s8_u64(a);
11418 }
11419 
11420 // CHECK-LABEL: @test_vreinterpret_s8_f16(
11421 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11422 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)11423 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
11424   return vreinterpret_s8_f16(a);
11425 }
11426 
11427 // CHECK-LABEL: @test_vreinterpret_s8_f32(
11428 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11429 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)11430 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
11431   return vreinterpret_s8_f32(a);
11432 }
11433 
11434 // CHECK-LABEL: @test_vreinterpret_s8_p8(
11435 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)11436 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
11437   return vreinterpret_s8_p8(a);
11438 }
11439 
11440 // CHECK-LABEL: @test_vreinterpret_s8_p16(
11441 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11442 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)11443 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
11444   return vreinterpret_s8_p16(a);
11445 }
11446 
11447 // CHECK-LABEL: @test_vreinterpret_s16_s8(
11448 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11449 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)11450 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
11451   return vreinterpret_s16_s8(a);
11452 }
11453 
11454 // CHECK-LABEL: @test_vreinterpret_s16_s32(
11455 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11456 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)11457 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
11458   return vreinterpret_s16_s32(a);
11459 }
11460 
11461 // CHECK-LABEL: @test_vreinterpret_s16_s64(
11462 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11463 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)11464 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
11465   return vreinterpret_s16_s64(a);
11466 }
11467 
11468 // CHECK-LABEL: @test_vreinterpret_s16_u8(
11469 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11470 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)11471 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
11472   return vreinterpret_s16_u8(a);
11473 }
11474 
11475 // CHECK-LABEL: @test_vreinterpret_s16_u16(
11476 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)11477 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
11478   return vreinterpret_s16_u16(a);
11479 }
11480 
11481 // CHECK-LABEL: @test_vreinterpret_s16_u32(
11482 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11483 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)11484 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
11485   return vreinterpret_s16_u32(a);
11486 }
11487 
11488 // CHECK-LABEL: @test_vreinterpret_s16_u64(
11489 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11490 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)11491 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
11492   return vreinterpret_s16_u64(a);
11493 }
11494 
11495 // CHECK-LABEL: @test_vreinterpret_s16_f16(
11496 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11497 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)11498 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
11499   return vreinterpret_s16_f16(a);
11500 }
11501 
11502 // CHECK-LABEL: @test_vreinterpret_s16_f32(
11503 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11504 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)11505 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
11506   return vreinterpret_s16_f32(a);
11507 }
11508 
11509 // CHECK-LABEL: @test_vreinterpret_s16_p8(
11510 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11511 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)11512 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
11513   return vreinterpret_s16_p8(a);
11514 }
11515 
11516 // CHECK-LABEL: @test_vreinterpret_s16_p16(
11517 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)11518 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
11519   return vreinterpret_s16_p16(a);
11520 }
11521 
11522 // CHECK-LABEL: @test_vreinterpret_s32_s8(
11523 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11524 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)11525 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
11526   return vreinterpret_s32_s8(a);
11527 }
11528 
11529 // CHECK-LABEL: @test_vreinterpret_s32_s16(
11530 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11531 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)11532 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
11533   return vreinterpret_s32_s16(a);
11534 }
11535 
11536 // CHECK-LABEL: @test_vreinterpret_s32_s64(
11537 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11538 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)11539 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
11540   return vreinterpret_s32_s64(a);
11541 }
11542 
11543 // CHECK-LABEL: @test_vreinterpret_s32_u8(
11544 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11545 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)11546 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
11547   return vreinterpret_s32_u8(a);
11548 }
11549 
11550 // CHECK-LABEL: @test_vreinterpret_s32_u16(
11551 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11552 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)11553 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
11554   return vreinterpret_s32_u16(a);
11555 }
11556 
11557 // CHECK-LABEL: @test_vreinterpret_s32_u32(
11558 // CHECK:   ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)11559 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
11560   return vreinterpret_s32_u32(a);
11561 }
11562 
11563 // CHECK-LABEL: @test_vreinterpret_s32_u64(
11564 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11565 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)11566 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
11567   return vreinterpret_s32_u64(a);
11568 }
11569 
11570 // CHECK-LABEL: @test_vreinterpret_s32_f16(
11571 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11572 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)11573 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
11574   return vreinterpret_s32_f16(a);
11575 }
11576 
11577 // CHECK-LABEL: @test_vreinterpret_s32_f32(
11578 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11579 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)11580 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
11581   return vreinterpret_s32_f32(a);
11582 }
11583 
11584 // CHECK-LABEL: @test_vreinterpret_s32_p8(
11585 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11586 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)11587 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
11588   return vreinterpret_s32_p8(a);
11589 }
11590 
11591 // CHECK-LABEL: @test_vreinterpret_s32_p16(
11592 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11593 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)11594 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
11595   return vreinterpret_s32_p16(a);
11596 }
11597 
11598 // CHECK-LABEL: @test_vreinterpret_s64_s8(
11599 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11600 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)11601 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
11602   return vreinterpret_s64_s8(a);
11603 }
11604 
11605 // CHECK-LABEL: @test_vreinterpret_s64_s16(
11606 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11607 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)11608 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
11609   return vreinterpret_s64_s16(a);
11610 }
11611 
11612 // CHECK-LABEL: @test_vreinterpret_s64_s32(
11613 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11614 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)11615 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
11616   return vreinterpret_s64_s32(a);
11617 }
11618 
11619 // CHECK-LABEL: @test_vreinterpret_s64_u8(
11620 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11621 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)11622 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
11623   return vreinterpret_s64_u8(a);
11624 }
11625 
11626 // CHECK-LABEL: @test_vreinterpret_s64_u16(
11627 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11628 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)11629 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
11630   return vreinterpret_s64_u16(a);
11631 }
11632 
11633 // CHECK-LABEL: @test_vreinterpret_s64_u32(
11634 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11635 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)11636 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
11637   return vreinterpret_s64_u32(a);
11638 }
11639 
11640 // CHECK-LABEL: @test_vreinterpret_s64_u64(
11641 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)11642 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
11643   return vreinterpret_s64_u64(a);
11644 }
11645 
11646 // CHECK-LABEL: @test_vreinterpret_s64_f16(
11647 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
11648 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)11649 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
11650   return vreinterpret_s64_f16(a);
11651 }
11652 
11653 // CHECK-LABEL: @test_vreinterpret_s64_f32(
11654 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
11655 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)11656 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
11657   return vreinterpret_s64_f32(a);
11658 }
11659 
11660 // CHECK-LABEL: @test_vreinterpret_s64_p8(
11661 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11662 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)11663 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
11664   return vreinterpret_s64_p8(a);
11665 }
11666 
11667 // CHECK-LABEL: @test_vreinterpret_s64_p16(
11668 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11669 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)11670 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
11671   return vreinterpret_s64_p16(a);
11672 }
11673 
11674 // CHECK-LABEL: @test_vreinterpret_u8_s8(
11675 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)11676 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
11677   return vreinterpret_u8_s8(a);
11678 }
11679 
11680 // CHECK-LABEL: @test_vreinterpret_u8_s16(
11681 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11682 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)11683 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
11684   return vreinterpret_u8_s16(a);
11685 }
11686 
11687 // CHECK-LABEL: @test_vreinterpret_u8_s32(
11688 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11689 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)11690 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
11691   return vreinterpret_u8_s32(a);
11692 }
11693 
11694 // CHECK-LABEL: @test_vreinterpret_u8_s64(
11695 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11696 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)11697 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
11698   return vreinterpret_u8_s64(a);
11699 }
11700 
11701 // CHECK-LABEL: @test_vreinterpret_u8_u16(
11702 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11703 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)11704 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
11705   return vreinterpret_u8_u16(a);
11706 }
11707 
11708 // CHECK-LABEL: @test_vreinterpret_u8_u32(
11709 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11710 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)11711 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
11712   return vreinterpret_u8_u32(a);
11713 }
11714 
11715 // CHECK-LABEL: @test_vreinterpret_u8_u64(
11716 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11717 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)11718 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
11719   return vreinterpret_u8_u64(a);
11720 }
11721 
11722 // CHECK-LABEL: @test_vreinterpret_u8_f16(
11723 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11724 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)11725 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
11726   return vreinterpret_u8_f16(a);
11727 }
11728 
11729 // CHECK-LABEL: @test_vreinterpret_u8_f32(
11730 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11731 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)11732 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
11733   return vreinterpret_u8_f32(a);
11734 }
11735 
11736 // CHECK-LABEL: @test_vreinterpret_u8_p8(
11737 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)11738 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
11739   return vreinterpret_u8_p8(a);
11740 }
11741 
11742 // CHECK-LABEL: @test_vreinterpret_u8_p16(
11743 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11744 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)11745 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
11746   return vreinterpret_u8_p16(a);
11747 }
11748 
11749 // CHECK-LABEL: @test_vreinterpret_u16_s8(
11750 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11751 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)11752 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
11753   return vreinterpret_u16_s8(a);
11754 }
11755 
11756 // CHECK-LABEL: @test_vreinterpret_u16_s16(
11757 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)11758 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
11759   return vreinterpret_u16_s16(a);
11760 }
11761 
11762 // CHECK-LABEL: @test_vreinterpret_u16_s32(
11763 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11764 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)11765 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
11766   return vreinterpret_u16_s32(a);
11767 }
11768 
11769 // CHECK-LABEL: @test_vreinterpret_u16_s64(
11770 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11771 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)11772 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
11773   return vreinterpret_u16_s64(a);
11774 }
11775 
11776 // CHECK-LABEL: @test_vreinterpret_u16_u8(
11777 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11778 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)11779 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
11780   return vreinterpret_u16_u8(a);
11781 }
11782 
11783 // CHECK-LABEL: @test_vreinterpret_u16_u32(
11784 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11785 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)11786 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
11787   return vreinterpret_u16_u32(a);
11788 }
11789 
11790 // CHECK-LABEL: @test_vreinterpret_u16_u64(
11791 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11792 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)11793 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
11794   return vreinterpret_u16_u64(a);
11795 }
11796 
11797 // CHECK-LABEL: @test_vreinterpret_u16_f16(
11798 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11799 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)11800 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
11801   return vreinterpret_u16_f16(a);
11802 }
11803 
11804 // CHECK-LABEL: @test_vreinterpret_u16_f32(
11805 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11806 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)11807 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
11808   return vreinterpret_u16_f32(a);
11809 }
11810 
11811 // CHECK-LABEL: @test_vreinterpret_u16_p8(
11812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11813 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)11814 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
11815   return vreinterpret_u16_p8(a);
11816 }
11817 
11818 // CHECK-LABEL: @test_vreinterpret_u16_p16(
11819 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)11820 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
11821   return vreinterpret_u16_p16(a);
11822 }
11823 
11824 // CHECK-LABEL: @test_vreinterpret_u32_s8(
11825 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11826 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)11827 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
11828   return vreinterpret_u32_s8(a);
11829 }
11830 
11831 // CHECK-LABEL: @test_vreinterpret_u32_s16(
11832 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11833 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)11834 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
11835   return vreinterpret_u32_s16(a);
11836 }
11837 
11838 // CHECK-LABEL: @test_vreinterpret_u32_s32(
11839 // CHECK:   ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)11840 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
11841   return vreinterpret_u32_s32(a);
11842 }
11843 
11844 // CHECK-LABEL: @test_vreinterpret_u32_s64(
11845 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11846 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)11847 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
11848   return vreinterpret_u32_s64(a);
11849 }
11850 
11851 // CHECK-LABEL: @test_vreinterpret_u32_u8(
11852 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11853 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)11854 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
11855   return vreinterpret_u32_u8(a);
11856 }
11857 
11858 // CHECK-LABEL: @test_vreinterpret_u32_u16(
11859 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11860 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)11861 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
11862   return vreinterpret_u32_u16(a);
11863 }
11864 
11865 // CHECK-LABEL: @test_vreinterpret_u32_u64(
11866 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11867 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)11868 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
11869   return vreinterpret_u32_u64(a);
11870 }
11871 
11872 // CHECK-LABEL: @test_vreinterpret_u32_f16(
11873 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11874 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)11875 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
11876   return vreinterpret_u32_f16(a);
11877 }
11878 
11879 // CHECK-LABEL: @test_vreinterpret_u32_f32(
11880 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11881 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)11882 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
11883   return vreinterpret_u32_f32(a);
11884 }
11885 
11886 // CHECK-LABEL: @test_vreinterpret_u32_p8(
11887 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11888 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)11889 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
11890   return vreinterpret_u32_p8(a);
11891 }
11892 
11893 // CHECK-LABEL: @test_vreinterpret_u32_p16(
11894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11895 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)11896 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
11897   return vreinterpret_u32_p16(a);
11898 }
11899 
11900 // CHECK-LABEL: @test_vreinterpret_u64_s8(
11901 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11902 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)11903 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
11904   return vreinterpret_u64_s8(a);
11905 }
11906 
11907 // CHECK-LABEL: @test_vreinterpret_u64_s16(
11908 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11909 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)11910 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
11911   return vreinterpret_u64_s16(a);
11912 }
11913 
11914 // CHECK-LABEL: @test_vreinterpret_u64_s32(
11915 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11916 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)11917 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
11918   return vreinterpret_u64_s32(a);
11919 }
11920 
11921 // CHECK-LABEL: @test_vreinterpret_u64_s64(
11922 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)11923 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
11924   return vreinterpret_u64_s64(a);
11925 }
11926 
11927 // CHECK-LABEL: @test_vreinterpret_u64_u8(
11928 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11929 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)11930 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
11931   return vreinterpret_u64_u8(a);
11932 }
11933 
11934 // CHECK-LABEL: @test_vreinterpret_u64_u16(
11935 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11936 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)11937 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
11938   return vreinterpret_u64_u16(a);
11939 }
11940 
11941 // CHECK-LABEL: @test_vreinterpret_u64_u32(
11942 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11943 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)11944 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
11945   return vreinterpret_u64_u32(a);
11946 }
11947 
11948 // CHECK-LABEL: @test_vreinterpret_u64_f16(
11949 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
11950 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)11951 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
11952   return vreinterpret_u64_f16(a);
11953 }
11954 
11955 // CHECK-LABEL: @test_vreinterpret_u64_f32(
11956 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
11957 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)11958 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
11959   return vreinterpret_u64_f32(a);
11960 }
11961 
11962 // CHECK-LABEL: @test_vreinterpret_u64_p8(
11963 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11964 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)11965 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
11966   return vreinterpret_u64_p8(a);
11967 }
11968 
11969 // CHECK-LABEL: @test_vreinterpret_u64_p16(
11970 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11971 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)11972 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
11973   return vreinterpret_u64_p16(a);
11974 }
11975 
11976 // CHECK-LABEL: @test_vreinterpret_f16_s8(
11977 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
11978 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)11979 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
11980   return vreinterpret_f16_s8(a);
11981 }
11982 
11983 // CHECK-LABEL: @test_vreinterpret_f16_s16(
11984 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
11985 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)11986 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
11987   return vreinterpret_f16_s16(a);
11988 }
11989 
11990 // CHECK-LABEL: @test_vreinterpret_f16_s32(
11991 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
11992 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)11993 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
11994   return vreinterpret_f16_s32(a);
11995 }
11996 
11997 // CHECK-LABEL: @test_vreinterpret_f16_s64(
11998 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
11999 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)12000 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
12001   return vreinterpret_f16_s64(a);
12002 }
12003 
12004 // CHECK-LABEL: @test_vreinterpret_f16_u8(
12005 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12006 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)12007 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
12008   return vreinterpret_f16_u8(a);
12009 }
12010 
12011 // CHECK-LABEL: @test_vreinterpret_f16_u16(
12012 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12013 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)12014 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
12015   return vreinterpret_f16_u16(a);
12016 }
12017 
12018 // CHECK-LABEL: @test_vreinterpret_f16_u32(
12019 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
12020 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)12021 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
12022   return vreinterpret_f16_u32(a);
12023 }
12024 
12025 // CHECK-LABEL: @test_vreinterpret_f16_u64(
12026 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
12027 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)12028 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
12029   return vreinterpret_f16_u64(a);
12030 }
12031 
12032 // CHECK-LABEL: @test_vreinterpret_f16_f32(
12033 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
12034 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)12035 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
12036   return vreinterpret_f16_f32(a);
12037 }
12038 
12039 // CHECK-LABEL: @test_vreinterpret_f16_p8(
12040 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12041 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)12042 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
12043   return vreinterpret_f16_p8(a);
12044 }
12045 
12046 // CHECK-LABEL: @test_vreinterpret_f16_p16(
12047 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12048 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)12049 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
12050   return vreinterpret_f16_p16(a);
12051 }
12052 
12053 // CHECK-LABEL: @test_vreinterpret_f32_s8(
12054 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12055 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)12056 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
12057   return vreinterpret_f32_s8(a);
12058 }
12059 
12060 // CHECK-LABEL: @test_vreinterpret_f32_s16(
12061 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12062 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)12063 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
12064   return vreinterpret_f32_s16(a);
12065 }
12066 
12067 // CHECK-LABEL: @test_vreinterpret_f32_s32(
12068 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
12069 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)12070 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
12071   return vreinterpret_f32_s32(a);
12072 }
12073 
12074 // CHECK-LABEL: @test_vreinterpret_f32_s64(
12075 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
12076 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)12077 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
12078   return vreinterpret_f32_s64(a);
12079 }
12080 
12081 // CHECK-LABEL: @test_vreinterpret_f32_u8(
12082 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12083 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)12084 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
12085   return vreinterpret_f32_u8(a);
12086 }
12087 
12088 // CHECK-LABEL: @test_vreinterpret_f32_u16(
12089 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12090 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)12091 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
12092   return vreinterpret_f32_u16(a);
12093 }
12094 
12095 // CHECK-LABEL: @test_vreinterpret_f32_u32(
12096 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
12097 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)12098 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
12099   return vreinterpret_f32_u32(a);
12100 }
12101 
12102 // CHECK-LABEL: @test_vreinterpret_f32_u64(
12103 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
12104 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)12105 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
12106   return vreinterpret_f32_u64(a);
12107 }
12108 
12109 // CHECK-LABEL: @test_vreinterpret_f32_f16(
12110 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
12111 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)12112 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
12113   return vreinterpret_f32_f16(a);
12114 }
12115 
12116 // CHECK-LABEL: @test_vreinterpret_f32_p8(
12117 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12118 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)12119 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
12120   return vreinterpret_f32_p8(a);
12121 }
12122 
12123 // CHECK-LABEL: @test_vreinterpret_f32_p16(
12124 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12125 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)12126 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
12127   return vreinterpret_f32_p16(a);
12128 }
12129 
12130 // CHECK-LABEL: @test_vreinterpret_p8_s8(
12131 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)12132 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
12133   return vreinterpret_p8_s8(a);
12134 }
12135 
12136 // CHECK-LABEL: @test_vreinterpret_p8_s16(
12137 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12138 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)12139 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
12140   return vreinterpret_p8_s16(a);
12141 }
12142 
12143 // CHECK-LABEL: @test_vreinterpret_p8_s32(
12144 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12145 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)12146 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
12147   return vreinterpret_p8_s32(a);
12148 }
12149 
12150 // CHECK-LABEL: @test_vreinterpret_p8_s64(
12151 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12152 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)12153 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
12154   return vreinterpret_p8_s64(a);
12155 }
12156 
12157 // CHECK-LABEL: @test_vreinterpret_p8_u8(
12158 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)12159 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
12160   return vreinterpret_p8_u8(a);
12161 }
12162 
12163 // CHECK-LABEL: @test_vreinterpret_p8_u16(
12164 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12165 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)12166 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
12167   return vreinterpret_p8_u16(a);
12168 }
12169 
12170 // CHECK-LABEL: @test_vreinterpret_p8_u32(
12171 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12172 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)12173 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
12174   return vreinterpret_p8_u32(a);
12175 }
12176 
12177 // CHECK-LABEL: @test_vreinterpret_p8_u64(
12178 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12179 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)12180 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
12181   return vreinterpret_p8_u64(a);
12182 }
12183 
12184 // CHECK-LABEL: @test_vreinterpret_p8_f16(
12185 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
12186 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)12187 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
12188   return vreinterpret_p8_f16(a);
12189 }
12190 
12191 // CHECK-LABEL: @test_vreinterpret_p8_f32(
12192 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
12193 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)12194 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
12195   return vreinterpret_p8_f32(a);
12196 }
12197 
12198 // CHECK-LABEL: @test_vreinterpret_p8_p16(
12199 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12200 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)12201 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
12202   return vreinterpret_p8_p16(a);
12203 }
12204 
12205 // CHECK-LABEL: @test_vreinterpret_p16_s8(
12206 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12207 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)12208 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
12209   return vreinterpret_p16_s8(a);
12210 }
12211 
12212 // CHECK-LABEL: @test_vreinterpret_p16_s16(
12213 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)12214 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
12215   return vreinterpret_p16_s16(a);
12216 }
12217 
12218 // CHECK-LABEL: @test_vreinterpret_p16_s32(
12219 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
12220 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)12221 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
12222   return vreinterpret_p16_s32(a);
12223 }
12224 
12225 // CHECK-LABEL: @test_vreinterpret_p16_s64(
12226 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
12227 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)12228 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
12229   return vreinterpret_p16_s64(a);
12230 }
12231 
12232 // CHECK-LABEL: @test_vreinterpret_p16_u8(
12233 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12234 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)12235 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
12236   return vreinterpret_p16_u8(a);
12237 }
12238 
12239 // CHECK-LABEL: @test_vreinterpret_p16_u16(
12240 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)12241 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
12242   return vreinterpret_p16_u16(a);
12243 }
12244 
12245 // CHECK-LABEL: @test_vreinterpret_p16_u32(
12246 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
12247 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)12248 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
12249   return vreinterpret_p16_u32(a);
12250 }
12251 
12252 // CHECK-LABEL: @test_vreinterpret_p16_u64(
12253 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
12254 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)12255 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
12256   return vreinterpret_p16_u64(a);
12257 }
12258 
12259 // CHECK-LABEL: @test_vreinterpret_p16_f16(
12260 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
12261 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)12262 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
12263   return vreinterpret_p16_f16(a);
12264 }
12265 
12266 // CHECK-LABEL: @test_vreinterpret_p16_f32(
12267 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
12268 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)12269 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
12270   return vreinterpret_p16_f32(a);
12271 }
12272 
12273 // CHECK-LABEL: @test_vreinterpret_p16_p8(
12274 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12275 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)12276 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
12277   return vreinterpret_p16_p8(a);
12278 }
12279 
12280 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
12281 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12282 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)12283 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
12284   return vreinterpretq_s8_s16(a);
12285 }
12286 
12287 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
12288 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12289 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)12290 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
12291   return vreinterpretq_s8_s32(a);
12292 }
12293 
12294 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
12295 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12296 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)12297 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
12298   return vreinterpretq_s8_s64(a);
12299 }
12300 
12301 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
12302 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)12303 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
12304   return vreinterpretq_s8_u8(a);
12305 }
12306 
12307 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
12308 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12309 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)12310 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
12311   return vreinterpretq_s8_u16(a);
12312 }
12313 
12314 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
12315 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12316 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)12317 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
12318   return vreinterpretq_s8_u32(a);
12319 }
12320 
12321 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
12322 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12323 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)12324 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
12325   return vreinterpretq_s8_u64(a);
12326 }
12327 
12328 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
12329 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12330 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)12331 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
12332   return vreinterpretq_s8_f16(a);
12333 }
12334 
12335 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
12336 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12337 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)12338 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
12339   return vreinterpretq_s8_f32(a);
12340 }
12341 
12342 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
12343 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)12344 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
12345   return vreinterpretq_s8_p8(a);
12346 }
12347 
12348 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
12349 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12350 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)12351 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
12352   return vreinterpretq_s8_p16(a);
12353 }
12354 
12355 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
12356 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12357 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)12358 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
12359   return vreinterpretq_s16_s8(a);
12360 }
12361 
12362 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
12363 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12364 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)12365 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
12366   return vreinterpretq_s16_s32(a);
12367 }
12368 
12369 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
12370 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12371 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)12372 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
12373   return vreinterpretq_s16_s64(a);
12374 }
12375 
12376 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
12377 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12378 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)12379 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
12380   return vreinterpretq_s16_u8(a);
12381 }
12382 
12383 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
12384 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)12385 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
12386   return vreinterpretq_s16_u16(a);
12387 }
12388 
12389 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
12390 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12391 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)12392 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
12393   return vreinterpretq_s16_u32(a);
12394 }
12395 
12396 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
12397 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12398 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)12399 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
12400   return vreinterpretq_s16_u64(a);
12401 }
12402 
12403 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
12404 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12405 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)12406 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
12407   return vreinterpretq_s16_f16(a);
12408 }
12409 
12410 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
12411 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12412 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)12413 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
12414   return vreinterpretq_s16_f32(a);
12415 }
12416 
12417 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
12418 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12419 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)12420 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
12421   return vreinterpretq_s16_p8(a);
12422 }
12423 
12424 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
12425 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)12426 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
12427   return vreinterpretq_s16_p16(a);
12428 }
12429 
12430 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
12431 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12432 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)12433 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
12434   return vreinterpretq_s32_s8(a);
12435 }
12436 
12437 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
12438 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12439 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)12440 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
12441   return vreinterpretq_s32_s16(a);
12442 }
12443 
12444 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
12445 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12446 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)12447 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
12448   return vreinterpretq_s32_s64(a);
12449 }
12450 
12451 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
12452 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12453 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)12454 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
12455   return vreinterpretq_s32_u8(a);
12456 }
12457 
12458 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
12459 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12460 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)12461 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
12462   return vreinterpretq_s32_u16(a);
12463 }
12464 
12465 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
12466 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)12467 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
12468   return vreinterpretq_s32_u32(a);
12469 }
12470 
12471 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
12472 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12473 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)12474 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
12475   return vreinterpretq_s32_u64(a);
12476 }
12477 
12478 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
12479 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12480 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)12481 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
12482   return vreinterpretq_s32_f16(a);
12483 }
12484 
12485 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
12486 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12487 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)12488 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
12489   return vreinterpretq_s32_f32(a);
12490 }
12491 
12492 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
12493 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12494 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)12495 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
12496   return vreinterpretq_s32_p8(a);
12497 }
12498 
12499 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
12500 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12501 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)12502 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
12503   return vreinterpretq_s32_p16(a);
12504 }
12505 
12506 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
12507 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12508 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)12509 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
12510   return vreinterpretq_s64_s8(a);
12511 }
12512 
12513 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
12514 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12515 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)12516 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
12517   return vreinterpretq_s64_s16(a);
12518 }
12519 
12520 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
12521 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12522 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)12523 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
12524   return vreinterpretq_s64_s32(a);
12525 }
12526 
12527 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
12528 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12529 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)12530 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
12531   return vreinterpretq_s64_u8(a);
12532 }
12533 
12534 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
12535 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12536 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)12537 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
12538   return vreinterpretq_s64_u16(a);
12539 }
12540 
12541 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
12542 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12543 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)12544 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
12545   return vreinterpretq_s64_u32(a);
12546 }
12547 
12548 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
12549 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)12550 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
12551   return vreinterpretq_s64_u64(a);
12552 }
12553 
12554 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
12555 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12556 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)12557 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
12558   return vreinterpretq_s64_f16(a);
12559 }
12560 
12561 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
12562 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12563 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)12564 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
12565   return vreinterpretq_s64_f32(a);
12566 }
12567 
12568 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
12569 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12570 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)12571 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
12572   return vreinterpretq_s64_p8(a);
12573 }
12574 
12575 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
12576 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12577 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)12578 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
12579   return vreinterpretq_s64_p16(a);
12580 }
12581 
12582 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
12583 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)12584 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
12585   return vreinterpretq_u8_s8(a);
12586 }
12587 
12588 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
12589 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12590 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)12591 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
12592   return vreinterpretq_u8_s16(a);
12593 }
12594 
12595 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
12596 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12597 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)12598 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
12599   return vreinterpretq_u8_s32(a);
12600 }
12601 
12602 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
12603 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12604 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)12605 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
12606   return vreinterpretq_u8_s64(a);
12607 }
12608 
12609 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
12610 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12611 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)12612 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
12613   return vreinterpretq_u8_u16(a);
12614 }
12615 
12616 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
12617 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12618 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)12619 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
12620   return vreinterpretq_u8_u32(a);
12621 }
12622 
12623 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
12624 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12625 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)12626 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
12627   return vreinterpretq_u8_u64(a);
12628 }
12629 
12630 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
12631 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12632 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)12633 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
12634   return vreinterpretq_u8_f16(a);
12635 }
12636 
12637 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
12638 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12639 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)12640 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
12641   return vreinterpretq_u8_f32(a);
12642 }
12643 
12644 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
12645 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)12646 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
12647   return vreinterpretq_u8_p8(a);
12648 }
12649 
12650 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
12651 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12652 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)12653 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
12654   return vreinterpretq_u8_p16(a);
12655 }
12656 
12657 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
12658 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12659 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)12660 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
12661   return vreinterpretq_u16_s8(a);
12662 }
12663 
12664 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
12665 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)12666 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
12667   return vreinterpretq_u16_s16(a);
12668 }
12669 
12670 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
12671 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12672 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)12673 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
12674   return vreinterpretq_u16_s32(a);
12675 }
12676 
12677 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
12678 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12679 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)12680 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
12681   return vreinterpretq_u16_s64(a);
12682 }
12683 
12684 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
12685 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12686 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)12687 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
12688   return vreinterpretq_u16_u8(a);
12689 }
12690 
12691 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
12692 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12693 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)12694 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
12695   return vreinterpretq_u16_u32(a);
12696 }
12697 
12698 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
12699 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12700 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)12701 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
12702   return vreinterpretq_u16_u64(a);
12703 }
12704 
12705 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
12706 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12707 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)12708 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
12709   return vreinterpretq_u16_f16(a);
12710 }
12711 
12712 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
12713 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12714 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)12715 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
12716   return vreinterpretq_u16_f32(a);
12717 }
12718 
12719 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
12720 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12721 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)12722 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
12723   return vreinterpretq_u16_p8(a);
12724 }
12725 
12726 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
12727 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)12728 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
12729   return vreinterpretq_u16_p16(a);
12730 }
12731 
12732 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
12733 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12734 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)12735 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
12736   return vreinterpretq_u32_s8(a);
12737 }
12738 
12739 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
12740 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12741 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)12742 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
12743   return vreinterpretq_u32_s16(a);
12744 }
12745 
12746 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
12747 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)12748 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
12749   return vreinterpretq_u32_s32(a);
12750 }
12751 
12752 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
12753 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12754 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)12755 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
12756   return vreinterpretq_u32_s64(a);
12757 }
12758 
12759 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
12760 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12761 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)12762 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
12763   return vreinterpretq_u32_u8(a);
12764 }
12765 
12766 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
12767 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12768 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)12769 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
12770   return vreinterpretq_u32_u16(a);
12771 }
12772 
12773 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
12774 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12775 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)12776 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
12777   return vreinterpretq_u32_u64(a);
12778 }
12779 
12780 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
12781 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12782 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)12783 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
12784   return vreinterpretq_u32_f16(a);
12785 }
12786 
12787 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
12788 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12789 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)12790 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
12791   return vreinterpretq_u32_f32(a);
12792 }
12793 
12794 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
12795 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12796 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)12797 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
12798   return vreinterpretq_u32_p8(a);
12799 }
12800 
12801 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
12802 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12803 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)12804 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
12805   return vreinterpretq_u32_p16(a);
12806 }
12807 
12808 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
12809 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12810 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)12811 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
12812   return vreinterpretq_u64_s8(a);
12813 }
12814 
12815 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
12816 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12817 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)12818 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
12819   return vreinterpretq_u64_s16(a);
12820 }
12821 
12822 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
12823 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12824 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)12825 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
12826   return vreinterpretq_u64_s32(a);
12827 }
12828 
12829 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
12830 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)12831 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
12832   return vreinterpretq_u64_s64(a);
12833 }
12834 
12835 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
12836 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12837 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)12838 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
12839   return vreinterpretq_u64_u8(a);
12840 }
12841 
12842 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
12843 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12844 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)12845 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
12846   return vreinterpretq_u64_u16(a);
12847 }
12848 
12849 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
12850 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12851 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)12852 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
12853   return vreinterpretq_u64_u32(a);
12854 }
12855 
12856 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
12857 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12858 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)12859 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
12860   return vreinterpretq_u64_f16(a);
12861 }
12862 
12863 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
12864 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12865 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)12866 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
12867   return vreinterpretq_u64_f32(a);
12868 }
12869 
12870 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
12871 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12872 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)12873 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
12874   return vreinterpretq_u64_p8(a);
12875 }
12876 
12877 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
12878 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12879 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)12880 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
12881   return vreinterpretq_u64_p16(a);
12882 }
12883 
12884 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
12885 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12886 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)12887 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
12888   return vreinterpretq_f16_s8(a);
12889 }
12890 
12891 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
12892 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12893 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)12894 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
12895   return vreinterpretq_f16_s16(a);
12896 }
12897 
12898 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
12899 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12900 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)12901 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
12902   return vreinterpretq_f16_s32(a);
12903 }
12904 
12905 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
12906 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12907 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)12908 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
12909   return vreinterpretq_f16_s64(a);
12910 }
12911 
12912 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
12913 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12914 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)12915 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
12916   return vreinterpretq_f16_u8(a);
12917 }
12918 
12919 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
12920 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12921 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)12922 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
12923   return vreinterpretq_f16_u16(a);
12924 }
12925 
12926 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
12927 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12928 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)12929 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
12930   return vreinterpretq_f16_u32(a);
12931 }
12932 
12933 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
12934 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12935 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)12936 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
12937   return vreinterpretq_f16_u64(a);
12938 }
12939 
12940 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
12941 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
12942 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)12943 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
12944   return vreinterpretq_f16_f32(a);
12945 }
12946 
12947 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
12948 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12949 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)12950 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
12951   return vreinterpretq_f16_p8(a);
12952 }
12953 
12954 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
12955 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12956 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)12957 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
12958   return vreinterpretq_f16_p16(a);
12959 }
12960 
12961 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
12962 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
12963 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)12964 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
12965   return vreinterpretq_f32_s8(a);
12966 }
12967 
12968 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
12969 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
12970 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)12971 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
12972   return vreinterpretq_f32_s16(a);
12973 }
12974 
12975 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
12976 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
12977 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)12978 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
12979   return vreinterpretq_f32_s32(a);
12980 }
12981 
12982 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
12983 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
12984 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)12985 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
12986   return vreinterpretq_f32_s64(a);
12987 }
12988 
12989 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
12990 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
12991 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)12992 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
12993   return vreinterpretq_f32_u8(a);
12994 }
12995 
12996 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
12997 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
12998 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)12999 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
13000   return vreinterpretq_f32_u16(a);
13001 }
13002 
13003 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
13004 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
13005 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)13006 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
13007   return vreinterpretq_f32_u32(a);
13008 }
13009 
13010 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
13011 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
13012 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)13013 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
13014   return vreinterpretq_f32_u64(a);
13015 }
13016 
13017 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
13018 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
13019 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)13020 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
13021   return vreinterpretq_f32_f16(a);
13022 }
13023 
13024 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
13025 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
13026 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)13027 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
13028   return vreinterpretq_f32_p8(a);
13029 }
13030 
13031 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
13032 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
13033 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)13034 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
13035   return vreinterpretq_f32_p16(a);
13036 }
13037 
13038 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
13039 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)13040 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
13041   return vreinterpretq_p8_s8(a);
13042 }
13043 
13044 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
13045 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13046 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)13047 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
13048   return vreinterpretq_p8_s16(a);
13049 }
13050 
13051 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
13052 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13053 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)13054 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
13055   return vreinterpretq_p8_s32(a);
13056 }
13057 
13058 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
13059 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13060 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)13061 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
13062   return vreinterpretq_p8_s64(a);
13063 }
13064 
13065 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
13066 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)13067 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
13068   return vreinterpretq_p8_u8(a);
13069 }
13070 
13071 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
13072 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13073 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)13074 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
13075   return vreinterpretq_p8_u16(a);
13076 }
13077 
13078 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
13079 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13080 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)13081 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
13082   return vreinterpretq_p8_u32(a);
13083 }
13084 
13085 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
13086 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13087 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)13088 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
13089   return vreinterpretq_p8_u64(a);
13090 }
13091 
13092 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
13093 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
13094 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)13095 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
13096   return vreinterpretq_p8_f16(a);
13097 }
13098 
13099 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
13100 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13101 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)13102 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
13103   return vreinterpretq_p8_f32(a);
13104 }
13105 
13106 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
13107 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13108 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)13109 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
13110   return vreinterpretq_p8_p16(a);
13111 }
13112 
13113 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
13114 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13115 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)13116 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
13117   return vreinterpretq_p16_s8(a);
13118 }
13119 
13120 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
13121 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)13122 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
13123   return vreinterpretq_p16_s16(a);
13124 }
13125 
13126 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
13127 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
13128 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)13129 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
13130   return vreinterpretq_p16_s32(a);
13131 }
13132 
13133 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
13134 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
13135 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)13136 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
13137   return vreinterpretq_p16_s64(a);
13138 }
13139 
13140 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
13141 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13142 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)13143 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
13144   return vreinterpretq_p16_u8(a);
13145 }
13146 
13147 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
13148 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)13149 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
13150   return vreinterpretq_p16_u16(a);
13151 }
13152 
13153 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
13154 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
13155 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)13156 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
13157   return vreinterpretq_p16_u32(a);
13158 }
13159 
13160 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
13161 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
13162 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)13163 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
13164   return vreinterpretq_p16_u64(a);
13165 }
13166 
13167 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
13168 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
13169 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)13170 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
13171   return vreinterpretq_p16_f16(a);
13172 }
13173 
13174 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
13175 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
13176 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)13177 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
13178   return vreinterpretq_p16_f32(a);
13179 }
13180 
13181 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
13182 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13183 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)13184 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
13185   return vreinterpretq_p16_p8(a);
13186 }
13187 
13188 // CHECK-LABEL: @test_vrev16_s8(
13189 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13190 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_s8(int8x8_t a)13191 int8x8_t test_vrev16_s8(int8x8_t a) {
13192   return vrev16_s8(a);
13193 }
13194 
13195 // CHECK-LABEL: @test_vrev16_u8(
13196 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13197 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_u8(uint8x8_t a)13198 uint8x8_t test_vrev16_u8(uint8x8_t a) {
13199   return vrev16_u8(a);
13200 }
13201 
13202 // CHECK-LABEL: @test_vrev16_p8(
13203 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13204 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_p8(poly8x8_t a)13205 poly8x8_t test_vrev16_p8(poly8x8_t a) {
13206   return vrev16_p8(a);
13207 }
13208 
13209 // CHECK-LABEL: @test_vrev16q_s8(
13210 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13211 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_s8(int8x16_t a)13212 int8x16_t test_vrev16q_s8(int8x16_t a) {
13213   return vrev16q_s8(a);
13214 }
13215 
13216 // CHECK-LABEL: @test_vrev16q_u8(
13217 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13218 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_u8(uint8x16_t a)13219 uint8x16_t test_vrev16q_u8(uint8x16_t a) {
13220   return vrev16q_u8(a);
13221 }
13222 
13223 // CHECK-LABEL: @test_vrev16q_p8(
13224 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13225 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_p8(poly8x16_t a)13226 poly8x16_t test_vrev16q_p8(poly8x16_t a) {
13227   return vrev16q_p8(a);
13228 }
13229 
13230 // CHECK-LABEL: @test_vrev32_s8(
13231 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13232 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_s8(int8x8_t a)13233 int8x8_t test_vrev32_s8(int8x8_t a) {
13234   return vrev32_s8(a);
13235 }
13236 
13237 // CHECK-LABEL: @test_vrev32_s16(
13238 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13239 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_s16(int16x4_t a)13240 int16x4_t test_vrev32_s16(int16x4_t a) {
13241   return vrev32_s16(a);
13242 }
13243 
13244 // CHECK-LABEL: @test_vrev32_u8(
13245 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13246 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_u8(uint8x8_t a)13247 uint8x8_t test_vrev32_u8(uint8x8_t a) {
13248   return vrev32_u8(a);
13249 }
13250 
13251 // CHECK-LABEL: @test_vrev32_u16(
13252 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13253 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_u16(uint16x4_t a)13254 uint16x4_t test_vrev32_u16(uint16x4_t a) {
13255   return vrev32_u16(a);
13256 }
13257 
13258 // CHECK-LABEL: @test_vrev32_p8(
13259 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13260 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_p8(poly8x8_t a)13261 poly8x8_t test_vrev32_p8(poly8x8_t a) {
13262   return vrev32_p8(a);
13263 }
13264 
13265 // CHECK-LABEL: @test_vrev32_p16(
13266 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13267 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_p16(poly16x4_t a)13268 poly16x4_t test_vrev32_p16(poly16x4_t a) {
13269   return vrev32_p16(a);
13270 }
13271 
13272 // CHECK-LABEL: @test_vrev32q_s8(
13273 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13274 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_s8(int8x16_t a)13275 int8x16_t test_vrev32q_s8(int8x16_t a) {
13276   return vrev32q_s8(a);
13277 }
13278 
13279 // CHECK-LABEL: @test_vrev32q_s16(
13280 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13281 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_s16(int16x8_t a)13282 int16x8_t test_vrev32q_s16(int16x8_t a) {
13283   return vrev32q_s16(a);
13284 }
13285 
13286 // CHECK-LABEL: @test_vrev32q_u8(
13287 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13288 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_u8(uint8x16_t a)13289 uint8x16_t test_vrev32q_u8(uint8x16_t a) {
13290   return vrev32q_u8(a);
13291 }
13292 
13293 // CHECK-LABEL: @test_vrev32q_u16(
13294 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13295 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_u16(uint16x8_t a)13296 uint16x8_t test_vrev32q_u16(uint16x8_t a) {
13297   return vrev32q_u16(a);
13298 }
13299 
13300 // CHECK-LABEL: @test_vrev32q_p8(
13301 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13302 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_p8(poly8x16_t a)13303 poly8x16_t test_vrev32q_p8(poly8x16_t a) {
13304   return vrev32q_p8(a);
13305 }
13306 
13307 // CHECK-LABEL: @test_vrev32q_p16(
13308 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13309 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_p16(poly16x8_t a)13310 poly16x8_t test_vrev32q_p16(poly16x8_t a) {
13311   return vrev32q_p16(a);
13312 }
13313 
13314 // CHECK-LABEL: @test_vrev64_s8(
13315 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13316 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_s8(int8x8_t a)13317 int8x8_t test_vrev64_s8(int8x8_t a) {
13318   return vrev64_s8(a);
13319 }
13320 
13321 // CHECK-LABEL: @test_vrev64_s16(
13322 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13323 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_s16(int16x4_t a)13324 int16x4_t test_vrev64_s16(int16x4_t a) {
13325   return vrev64_s16(a);
13326 }
13327 
13328 // CHECK-LABEL: @test_vrev64_s32(
13329 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
13330 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_s32(int32x2_t a)13331 int32x2_t test_vrev64_s32(int32x2_t a) {
13332   return vrev64_s32(a);
13333 }
13334 
13335 // CHECK-LABEL: @test_vrev64_u8(
13336 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13337 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_u8(uint8x8_t a)13338 uint8x8_t test_vrev64_u8(uint8x8_t a) {
13339   return vrev64_u8(a);
13340 }
13341 
13342 // CHECK-LABEL: @test_vrev64_u16(
13343 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13344 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_u16(uint16x4_t a)13345 uint16x4_t test_vrev64_u16(uint16x4_t a) {
13346   return vrev64_u16(a);
13347 }
13348 
13349 // CHECK-LABEL: @test_vrev64_u32(
13350 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
13351 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_u32(uint32x2_t a)13352 uint32x2_t test_vrev64_u32(uint32x2_t a) {
13353   return vrev64_u32(a);
13354 }
13355 
13356 // CHECK-LABEL: @test_vrev64_p8(
13357 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13358 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_p8(poly8x8_t a)13359 poly8x8_t test_vrev64_p8(poly8x8_t a) {
13360   return vrev64_p8(a);
13361 }
13362 
13363 // CHECK-LABEL: @test_vrev64_p16(
13364 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13365 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_p16(poly16x4_t a)13366 poly16x4_t test_vrev64_p16(poly16x4_t a) {
13367   return vrev64_p16(a);
13368 }
13369 
13370 // CHECK-LABEL: @test_vrev64_f32(
13371 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
13372 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vrev64_f32(float32x2_t a)13373 float32x2_t test_vrev64_f32(float32x2_t a) {
13374   return vrev64_f32(a);
13375 }
13376 
13377 // CHECK-LABEL: @test_vrev64q_s8(
13378 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13379 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_s8(int8x16_t a)13380 int8x16_t test_vrev64q_s8(int8x16_t a) {
13381   return vrev64q_s8(a);
13382 }
13383 
13384 // CHECK-LABEL: @test_vrev64q_s16(
13385 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13386 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_s16(int16x8_t a)13387 int16x8_t test_vrev64q_s16(int16x8_t a) {
13388   return vrev64q_s16(a);
13389 }
13390 
13391 // CHECK-LABEL: @test_vrev64q_s32(
13392 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13393 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_s32(int32x4_t a)13394 int32x4_t test_vrev64q_s32(int32x4_t a) {
13395   return vrev64q_s32(a);
13396 }
13397 
13398 // CHECK-LABEL: @test_vrev64q_u8(
13399 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13400 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_u8(uint8x16_t a)13401 uint8x16_t test_vrev64q_u8(uint8x16_t a) {
13402   return vrev64q_u8(a);
13403 }
13404 
13405 // CHECK-LABEL: @test_vrev64q_u16(
13406 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13407 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_u16(uint16x8_t a)13408 uint16x8_t test_vrev64q_u16(uint16x8_t a) {
13409   return vrev64q_u16(a);
13410 }
13411 
13412 // CHECK-LABEL: @test_vrev64q_u32(
13413 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13414 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_u32(uint32x4_t a)13415 uint32x4_t test_vrev64q_u32(uint32x4_t a) {
13416   return vrev64q_u32(a);
13417 }
13418 
13419 // CHECK-LABEL: @test_vrev64q_p8(
13420 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13421 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_p8(poly8x16_t a)13422 poly8x16_t test_vrev64q_p8(poly8x16_t a) {
13423   return vrev64q_p8(a);
13424 }
13425 
13426 // CHECK-LABEL: @test_vrev64q_p16(
13427 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13428 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_p16(poly16x8_t a)13429 poly16x8_t test_vrev64q_p16(poly16x8_t a) {
13430   return vrev64q_p16(a);
13431 }
13432 
13433 // CHECK-LABEL: @test_vrev64q_f32(
13434 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13435 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vrev64q_f32(float32x4_t a)13436 float32x4_t test_vrev64q_f32(float32x4_t a) {
13437   return vrev64q_f32(a);
13438 }
13439 
13440 // CHECK-LABEL: @test_vrhadd_s8(
13441 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
13442 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t a,int8x8_t b)13443 int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) {
13444   return vrhadd_s8(a, b);
13445 }
13446 
13447 // CHECK-LABEL: @test_vrhadd_s16(
13448 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13449 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13450 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
13451 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13452 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_s16(int16x4_t a,int16x4_t b)13453 int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) {
13454   return vrhadd_s16(a, b);
13455 }
13456 
13457 // CHECK-LABEL: @test_vrhadd_s32(
13458 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13459 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13460 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
13461 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13462 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_s32(int32x2_t a,int32x2_t b)13463 int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) {
13464   return vrhadd_s32(a, b);
13465 }
13466 
13467 // CHECK-LABEL: @test_vrhadd_u8(
13468 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
13469 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t a,uint8x8_t b)13470 uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) {
13471   return vrhadd_u8(a, b);
13472 }
13473 
13474 // CHECK-LABEL: @test_vrhadd_u16(
13475 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13476 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13477 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
13478 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13479 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_u16(uint16x4_t a,uint16x4_t b)13480 uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) {
13481   return vrhadd_u16(a, b);
13482 }
13483 
13484 // CHECK-LABEL: @test_vrhadd_u32(
13485 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13486 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13487 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
13488 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13489 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_u32(uint32x2_t a,uint32x2_t b)13490 uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) {
13491   return vrhadd_u32(a, b);
13492 }
13493 
13494 // CHECK-LABEL: @test_vrhaddq_s8(
13495 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
13496 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t a,int8x16_t b)13497 int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) {
13498   return vrhaddq_s8(a, b);
13499 }
13500 
13501 // CHECK-LABEL: @test_vrhaddq_s16(
13502 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13503 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13504 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
13505 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13506 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_s16(int16x8_t a,int16x8_t b)13507 int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) {
13508   return vrhaddq_s16(a, b);
13509 }
13510 
13511 // CHECK-LABEL: @test_vrhaddq_s32(
13512 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13513 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13514 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
13515 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13516 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_s32(int32x4_t a,int32x4_t b)13517 int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) {
13518   return vrhaddq_s32(a, b);
13519 }
13520 
13521 // CHECK-LABEL: @test_vrhaddq_u8(
13522 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
13523 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t a,uint8x16_t b)13524 uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) {
13525   return vrhaddq_u8(a, b);
13526 }
13527 
13528 // CHECK-LABEL: @test_vrhaddq_u16(
13529 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13530 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13531 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
13532 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13533 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_u16(uint16x8_t a,uint16x8_t b)13534 uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) {
13535   return vrhaddq_u16(a, b);
13536 }
13537 
13538 // CHECK-LABEL: @test_vrhaddq_u32(
13539 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13540 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13541 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
13542 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13543 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_u32(uint32x4_t a,uint32x4_t b)13544 uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) {
13545   return vrhaddq_u32(a, b);
13546 }
13547 
13548 // CHECK-LABEL: @test_vrshl_s8(
13549 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
13550 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)13551 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
13552   return vrshl_s8(a, b);
13553 }
13554 
13555 // CHECK-LABEL: @test_vrshl_s16(
13556 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13557 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13558 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
13559 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13560 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_s16(int16x4_t a,int16x4_t b)13561 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
13562   return vrshl_s16(a, b);
13563 }
13564 
13565 // CHECK-LABEL: @test_vrshl_s32(
13566 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13567 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13568 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
13569 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13570 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_s32(int32x2_t a,int32x2_t b)13571 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
13572   return vrshl_s32(a, b);
13573 }
13574 
13575 // CHECK-LABEL: @test_vrshl_s64(
13576 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13577 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13578 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
13579 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13580 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_s64(int64x1_t a,int64x1_t b)13581 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
13582   return vrshl_s64(a, b);
13583 }
13584 
13585 // CHECK-LABEL: @test_vrshl_u8(
13586 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
13587 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)13588 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
13589   return vrshl_u8(a, b);
13590 }
13591 
13592 // CHECK-LABEL: @test_vrshl_u16(
13593 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13594 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13595 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
13596 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13597 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)13598 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
13599   return vrshl_u16(a, b);
13600 }
13601 
13602 // CHECK-LABEL: @test_vrshl_u32(
13603 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13604 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13605 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
13606 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13607 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)13608 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
13609   return vrshl_u32(a, b);
13610 }
13611 
13612 // CHECK-LABEL: @test_vrshl_u64(
13613 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13614 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13615 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
13616 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13617 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)13618 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
13619   return vrshl_u64(a, b);
13620 }
13621 
13622 // CHECK-LABEL: @test_vrshlq_s8(
13623 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
13624 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)13625 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
13626   return vrshlq_s8(a, b);
13627 }
13628 
13629 // CHECK-LABEL: @test_vrshlq_s16(
13630 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13631 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13632 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
13633 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13634 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)13635 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
13636   return vrshlq_s16(a, b);
13637 }
13638 
13639 // CHECK-LABEL: @test_vrshlq_s32(
13640 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13641 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13642 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
13643 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13644 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)13645 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
13646   return vrshlq_s32(a, b);
13647 }
13648 
13649 // CHECK-LABEL: @test_vrshlq_s64(
13650 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13651 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13652 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
13653 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13654 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)13655 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
13656   return vrshlq_s64(a, b);
13657 }
13658 
13659 // CHECK-LABEL: @test_vrshlq_u8(
13660 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
13661 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)13662 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
13663   return vrshlq_u8(a, b);
13664 }
13665 
13666 // CHECK-LABEL: @test_vrshlq_u16(
13667 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13668 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13669 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
13670 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13671 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)13672 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
13673   return vrshlq_u16(a, b);
13674 }
13675 
13676 // CHECK-LABEL: @test_vrshlq_u32(
13677 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13678 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13679 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
13680 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13681 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)13682 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
13683   return vrshlq_u32(a, b);
13684 }
13685 
13686 // CHECK-LABEL: @test_vrshlq_u64(
13687 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13688 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13689 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
13690 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13691 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)13692 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
13693   return vrshlq_u64(a, b);
13694 }
13695 
13696 // CHECK-LABEL: @test_vrshrn_n_s16(
13697 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13698 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13699 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13700 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)13701 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
13702   return vrshrn_n_s16(a, 1);
13703 }
13704 
13705 // CHECK-LABEL: @test_vrshrn_n_s32(
13706 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13707 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13708 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13709 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)13710 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
13711   return vrshrn_n_s32(a, 1);
13712 }
13713 
13714 // CHECK-LABEL: @test_vrshrn_n_s64(
13715 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13716 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13717 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13718 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)13719 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
13720   return vrshrn_n_s64(a, 1);
13721 }
13722 
13723 // CHECK-LABEL: @test_vrshrn_n_u16(
13724 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13725 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13726 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13727 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)13728 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
13729   return vrshrn_n_u16(a, 1);
13730 }
13731 
13732 // CHECK-LABEL: @test_vrshrn_n_u32(
13733 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13734 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13735 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13736 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)13737 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
13738   return vrshrn_n_u32(a, 1);
13739 }
13740 
13741 // CHECK-LABEL: @test_vrshrn_n_u64(
13742 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13743 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13744 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13745 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)13746 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
13747   return vrshrn_n_u64(a, 1);
13748 }
13749 
13750 // CHECK-LABEL: @test_vrshr_n_s8(
13751 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13752 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)13753 int8x8_t test_vrshr_n_s8(int8x8_t a) {
13754   return vrshr_n_s8(a, 1);
13755 }
13756 
13757 // CHECK-LABEL: @test_vrshr_n_s16(
13758 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13759 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13760 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13761 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)13762 int16x4_t test_vrshr_n_s16(int16x4_t a) {
13763   return vrshr_n_s16(a, 1);
13764 }
13765 
13766 // CHECK-LABEL: @test_vrshr_n_s32(
13767 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13768 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13769 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13770 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)13771 int32x2_t test_vrshr_n_s32(int32x2_t a) {
13772   return vrshr_n_s32(a, 1);
13773 }
13774 
13775 // CHECK-LABEL: @test_vrshr_n_s64(
13776 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13777 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13778 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13779 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)13780 int64x1_t test_vrshr_n_s64(int64x1_t a) {
13781   return vrshr_n_s64(a, 1);
13782 }
13783 
13784 // CHECK-LABEL: @test_vrshr_n_u8(
13785 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13786 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(uint8x8_t a)13787 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
13788   return vrshr_n_u8(a, 1);
13789 }
13790 
13791 // CHECK-LABEL: @test_vrshr_n_u16(
13792 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13793 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13794 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13795 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(uint16x4_t a)13796 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
13797   return vrshr_n_u16(a, 1);
13798 }
13799 
13800 // CHECK-LABEL: @test_vrshr_n_u32(
13801 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13802 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13803 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13804 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(uint32x2_t a)13805 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
13806   return vrshr_n_u32(a, 1);
13807 }
13808 
13809 // CHECK-LABEL: @test_vrshr_n_u64(
13810 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13811 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13812 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13813 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)13814 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
13815   return vrshr_n_u64(a, 1);
13816 }
13817 
13818 // CHECK-LABEL: @test_vrshrq_n_s8(
13819 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13820 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)13821 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
13822   return vrshrq_n_s8(a, 1);
13823 }
13824 
13825 // CHECK-LABEL: @test_vrshrq_n_s16(
13826 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13827 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13828 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13829 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)13830 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
13831   return vrshrq_n_s16(a, 1);
13832 }
13833 
13834 // CHECK-LABEL: @test_vrshrq_n_s32(
13835 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13836 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13837 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13838 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)13839 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
13840   return vrshrq_n_s32(a, 1);
13841 }
13842 
13843 // CHECK-LABEL: @test_vrshrq_n_s64(
13844 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13845 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13846 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13847 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)13848 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
13849   return vrshrq_n_s64(a, 1);
13850 }
13851 
13852 // CHECK-LABEL: @test_vrshrq_n_u8(
13853 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13854 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(uint8x16_t a)13855 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
13856   return vrshrq_n_u8(a, 1);
13857 }
13858 
13859 // CHECK-LABEL: @test_vrshrq_n_u16(
13860 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13861 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13862 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13863 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(uint16x8_t a)13864 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
13865   return vrshrq_n_u16(a, 1);
13866 }
13867 
13868 // CHECK-LABEL: @test_vrshrq_n_u32(
13869 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13870 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13871 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13872 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(uint32x4_t a)13873 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
13874   return vrshrq_n_u32(a, 1);
13875 }
13876 
13877 // CHECK-LABEL: @test_vrshrq_n_u64(
13878 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13879 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13880 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13881 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(uint64x2_t a)13882 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
13883   return vrshrq_n_u64(a, 1);
13884 }
13885 
13886 // CHECK-LABEL: @test_vrsqrte_f32(
13887 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13888 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a)
13889 // CHECK:   ret <2 x float> [[VRSQRTE_V1_I]]
test_vrsqrte_f32(float32x2_t a)13890 float32x2_t test_vrsqrte_f32(float32x2_t a) {
13891   return vrsqrte_f32(a);
13892 }
13893 
13894 // CHECK-LABEL: @test_vrsqrte_u32(
13895 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13896 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a)
13897 // CHECK:   ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)13898 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
13899   return vrsqrte_u32(a);
13900 }
13901 
13902 // CHECK-LABEL: @test_vrsqrteq_f32(
13903 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13904 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a)
13905 // CHECK:   ret <4 x float> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_f32(float32x4_t a)13906 float32x4_t test_vrsqrteq_f32(float32x4_t a) {
13907   return vrsqrteq_f32(a);
13908 }
13909 
13910 // CHECK-LABEL: @test_vrsqrteq_u32(
13911 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13912 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a)
13913 // CHECK:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)13914 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
13915   return vrsqrteq_u32(a);
13916 }
13917 
13918 // CHECK-LABEL: @test_vrsqrts_f32(
13919 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13920 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
13921 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b)
13922 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
13923 // CHECK:   ret <2 x float> [[VRSQRTS_V2_I]]
test_vrsqrts_f32(float32x2_t a,float32x2_t b)13924 float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) {
13925   return vrsqrts_f32(a, b);
13926 }
13927 
13928 // CHECK-LABEL: @test_vrsqrtsq_f32(
13929 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13930 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
13931 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b)
13932 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
13933 // CHECK:   ret <4 x float> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f32(float32x4_t a,float32x4_t b)13934 float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) {
13935   return vrsqrtsq_f32(a, b);
13936 }
13937 
13938 // CHECK-LABEL: @test_vrsra_n_s8(
13939 // CHECK:   [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13940 // CHECK:   [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
13941 // CHECK:   ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)13942 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
13943   return vrsra_n_s8(a, b, 1);
13944 }
13945 
13946 // CHECK-LABEL: @test_vrsra_n_s16(
13947 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13948 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13949 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13950 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
13951 // CHECK:   [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13952 // CHECK:   [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
13953 // CHECK:   ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)13954 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
13955   return vrsra_n_s16(a, b, 1);
13956 }
13957 
13958 // CHECK-LABEL: @test_vrsra_n_s32(
13959 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13960 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13961 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13962 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
13963 // CHECK:   [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
13964 // CHECK:   [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
13965 // CHECK:   ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)13966 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
13967   return vrsra_n_s32(a, b, 1);
13968 }
13969 
13970 // CHECK-LABEL: @test_vrsra_n_s64(
13971 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13972 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13973 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13974 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13975 // CHECK:   [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
13976 // CHECK:   [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
13977 // CHECK:   ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)13978 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
13979   return vrsra_n_s64(a, b, 1);
13980 }
13981 
13982 // CHECK-LABEL: @test_vrsra_n_u8(
13983 // CHECK:   [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13984 // CHECK:   [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
13985 // CHECK:   ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_u8(uint8x8_t a,uint8x8_t b)13986 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
13987   return vrsra_n_u8(a, b, 1);
13988 }
13989 
13990 // CHECK-LABEL: @test_vrsra_n_u16(
13991 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13992 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13993 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13994 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
13995 // CHECK:   [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13996 // CHECK:   [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
13997 // CHECK:   ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_u16(uint16x4_t a,uint16x4_t b)13998 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
13999   return vrsra_n_u16(a, b, 1);
14000 }
14001 
14002 // CHECK-LABEL: @test_vrsra_n_u32(
14003 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14004 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14005 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14006 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14007 // CHECK:   [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
14008 // CHECK:   [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
14009 // CHECK:   ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_u32(uint32x2_t a,uint32x2_t b)14010 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
14011   return vrsra_n_u32(a, b, 1);
14012 }
14013 
14014 // CHECK-LABEL: @test_vrsra_n_u64(
14015 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14016 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14017 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14018 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14019 // CHECK:   [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
14020 // CHECK:   [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
14021 // CHECK:   ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)14022 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
14023   return vrsra_n_u64(a, b, 1);
14024 }
14025 
14026 // CHECK-LABEL: @test_vrsraq_n_s8(
14027 // CHECK:   [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14028 // CHECK:   [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
14029 // CHECK:   ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)14030 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
14031   return vrsraq_n_s8(a, b, 1);
14032 }
14033 
14034 // CHECK-LABEL: @test_vrsraq_n_s16(
14035 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14036 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14037 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14038 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14039 // CHECK:   [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
14040 // CHECK:   [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
14041 // CHECK:   ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)14042 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
14043   return vrsraq_n_s16(a, b, 1);
14044 }
14045 
14046 // CHECK-LABEL: @test_vrsraq_n_s32(
14047 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14048 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14049 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14050 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14051 // CHECK:   [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
14052 // CHECK:   [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
14053 // CHECK:   ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)14054 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
14055   return vrsraq_n_s32(a, b, 1);
14056 }
14057 
14058 // CHECK-LABEL: @test_vrsraq_n_s64(
14059 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14060 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14061 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14062 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14063 // CHECK:   [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
14064 // CHECK:   [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
14065 // CHECK:   ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)14066 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
14067   return vrsraq_n_s64(a, b, 1);
14068 }
14069 
14070 // CHECK-LABEL: @test_vrsraq_n_u8(
14071 // CHECK:   [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14072 // CHECK:   [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
14073 // CHECK:   ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_u8(uint8x16_t a,uint8x16_t b)14074 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
14075   return vrsraq_n_u8(a, b, 1);
14076 }
14077 
14078 // CHECK-LABEL: @test_vrsraq_n_u16(
14079 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14080 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14081 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14082 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14083 // CHECK:   [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
14084 // CHECK:   [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
14085 // CHECK:   ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_u16(uint16x8_t a,uint16x8_t b)14086 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
14087   return vrsraq_n_u16(a, b, 1);
14088 }
14089 
14090 // CHECK-LABEL: @test_vrsraq_n_u32(
14091 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14092 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14093 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14094 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14095 // CHECK:   [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
14096 // CHECK:   [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
14097 // CHECK:   ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_u32(uint32x4_t a,uint32x4_t b)14098 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
14099   return vrsraq_n_u32(a, b, 1);
14100 }
14101 
14102 // CHECK-LABEL: @test_vrsraq_n_u64(
14103 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14104 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14105 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14106 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14107 // CHECK:   [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
14108 // CHECK:   [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
14109 // CHECK:   ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_u64(uint64x2_t a,uint64x2_t b)14110 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
14111   return vrsraq_n_u64(a, b, 1);
14112 }
14113 
14114 // CHECK-LABEL: @test_vrsubhn_s16(
14115 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14116 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14117 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
14118 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)14119 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
14120   return vrsubhn_s16(a, b);
14121 }
14122 
14123 // CHECK-LABEL: @test_vrsubhn_s32(
14124 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14125 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14126 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
14127 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
14128 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)14129 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
14130   return vrsubhn_s32(a, b);
14131 }
14132 
14133 // CHECK-LABEL: @test_vrsubhn_s64(
14134 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14135 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14136 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
14137 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
14138 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)14139 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
14140   return vrsubhn_s64(a, b);
14141 }
14142 
14143 // CHECK-LABEL: @test_vrsubhn_u16(
14144 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14145 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14146 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
14147 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)14148 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
14149   return vrsubhn_u16(a, b);
14150 }
14151 
14152 // CHECK-LABEL: @test_vrsubhn_u32(
14153 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14154 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14155 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
14156 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
14157 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)14158 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
14159   return vrsubhn_u32(a, b);
14160 }
14161 
14162 // CHECK-LABEL: @test_vrsubhn_u64(
14163 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14164 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14165 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
14166 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
14167 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)14168 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
14169   return vrsubhn_u64(a, b);
14170 }
14171 
14172 // CHECK-LABEL: @test_vset_lane_u8(
14173 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14174 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_u8(uint8_t a,uint8x8_t b)14175 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
14176   return vset_lane_u8(a, b, 7);
14177 }
14178 
14179 // CHECK-LABEL: @test_vset_lane_u16(
14180 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14181 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_u16(uint16_t a,uint16x4_t b)14182 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
14183   return vset_lane_u16(a, b, 3);
14184 }
14185 
14186 // CHECK-LABEL: @test_vset_lane_u32(
14187 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
14188 // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_u32(uint32_t a,uint32x2_t b)14189 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
14190   return vset_lane_u32(a, b, 1);
14191 }
14192 
14193 // CHECK-LABEL: @test_vset_lane_s8(
14194 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14195 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_s8(int8_t a,int8x8_t b)14196 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
14197   return vset_lane_s8(a, b, 7);
14198 }
14199 
14200 // CHECK-LABEL: @test_vset_lane_s16(
14201 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14202 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_s16(int16_t a,int16x4_t b)14203 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
14204   return vset_lane_s16(a, b, 3);
14205 }
14206 
14207 // CHECK-LABEL: @test_vset_lane_s32(
14208 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
14209 // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_s32(int32_t a,int32x2_t b)14210 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
14211   return vset_lane_s32(a, b, 1);
14212 }
14213 
14214 // CHECK-LABEL: @test_vset_lane_p8(
14215 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14216 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_p8(poly8_t a,poly8x8_t b)14217 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
14218   return vset_lane_p8(a, b, 7);
14219 }
14220 
14221 // CHECK-LABEL: @test_vset_lane_p16(
14222 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14223 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_p16(poly16_t a,poly16x4_t b)14224 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
14225   return vset_lane_p16(a, b, 3);
14226 }
14227 
14228 // CHECK-LABEL: @test_vset_lane_f32(
14229 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
14230 // CHECK:   ret <2 x float> [[VSET_LANE]]
test_vset_lane_f32(float32_t a,float32x2_t b)14231 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
14232   return vset_lane_f32(a, b, 1);
14233 }
14234 
14235 // CHECK-LABEL: @test_vset_lane_f16(
14236 // CHECK:   [[__REINT_246:%.*]] = alloca half, align 2
14237 // CHECK:   [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
14238 // CHECK:   [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
14239 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
14240 // CHECK:   store half [[TMP0]], half* [[__REINT_246]], align 2
14241 // CHECK:   store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
14242 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
14243 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
14244 // CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
14245 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
14246 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 1
14247 // CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
14248 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
14249 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
14250 // CHECK:   ret <4 x half> [[TMP8]]
test_vset_lane_f16(float16_t * a,float16x4_t b)14251 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
14252   return vset_lane_f16(*a, b, 1);
14253 }
14254 
14255 // CHECK-LABEL: @test_vsetq_lane_u8(
14256 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14257 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_u8(uint8_t a,uint8x16_t b)14258 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
14259   return vsetq_lane_u8(a, b, 15);
14260 }
14261 
14262 // CHECK-LABEL: @test_vsetq_lane_u16(
14263 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14264 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_u16(uint16_t a,uint16x8_t b)14265 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
14266   return vsetq_lane_u16(a, b, 7);
14267 }
14268 
14269 // CHECK-LABEL: @test_vsetq_lane_u32(
14270 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
14271 // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_u32(uint32_t a,uint32x4_t b)14272 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
14273   return vsetq_lane_u32(a, b, 3);
14274 }
14275 
14276 // CHECK-LABEL: @test_vsetq_lane_s8(
14277 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14278 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_s8(int8_t a,int8x16_t b)14279 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
14280   return vsetq_lane_s8(a, b, 15);
14281 }
14282 
14283 // CHECK-LABEL: @test_vsetq_lane_s16(
14284 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14285 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_s16(int16_t a,int16x8_t b)14286 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
14287   return vsetq_lane_s16(a, b, 7);
14288 }
14289 
14290 // CHECK-LABEL: @test_vsetq_lane_s32(
14291 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
14292 // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_s32(int32_t a,int32x4_t b)14293 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
14294   return vsetq_lane_s32(a, b, 3);
14295 }
14296 
14297 // CHECK-LABEL: @test_vsetq_lane_p8(
14298 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14299 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_p8(poly8_t a,poly8x16_t b)14300 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
14301   return vsetq_lane_p8(a, b, 15);
14302 }
14303 
14304 // CHECK-LABEL: @test_vsetq_lane_p16(
14305 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14306 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_p16(poly16_t a,poly16x8_t b)14307 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
14308   return vsetq_lane_p16(a, b, 7);
14309 }
14310 
14311 // CHECK-LABEL: @test_vsetq_lane_f32(
14312 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
14313 // CHECK:   ret <4 x float> [[VSET_LANE]]
test_vsetq_lane_f32(float32_t a,float32x4_t b)14314 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
14315   return vsetq_lane_f32(a, b, 3);
14316 }
14317 
14318 // CHECK-LABEL: @test_vsetq_lane_f16(
14319 // CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
14320 // CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
14321 // CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
14322 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
14323 // CHECK:   store half [[TMP0]], half* [[__REINT_248]], align 2
14324 // CHECK:   store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
14325 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
14326 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
14327 // CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
14328 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
14329 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 3
14330 // CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
14331 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
14332 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
14333 // CHECK:   ret <8 x half> [[TMP8]]
test_vsetq_lane_f16(float16_t * a,float16x8_t b)14334 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
14335   return vsetq_lane_f16(*a, b, 3);
14336 }
14337 
14338 // CHECK-LABEL: @test_vset_lane_s64(
14339 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
14340 // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_s64(int64_t a,int64x1_t b)14341 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
14342   return vset_lane_s64(a, b, 0);
14343 }
14344 
14345 // CHECK-LABEL: @test_vset_lane_u64(
14346 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
14347 // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_u64(uint64_t a,uint64x1_t b)14348 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
14349   return vset_lane_u64(a, b, 0);
14350 }
14351 
14352 // CHECK-LABEL: @test_vsetq_lane_s64(
14353 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
14354 // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_s64(int64_t a,int64x2_t b)14355 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
14356   return vsetq_lane_s64(a, b, 1);
14357 }
14358 
14359 // CHECK-LABEL: @test_vsetq_lane_u64(
14360 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
14361 // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_u64(uint64_t a,uint64x2_t b)14362 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
14363   return vsetq_lane_u64(a, b, 1);
14364 }
14365 
14366 // CHECK-LABEL: @test_vshl_s8(
14367 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
14368 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)14369 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
14370   return vshl_s8(a, b);
14371 }
14372 
14373 // CHECK-LABEL: @test_vshl_s16(
14374 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14375 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14376 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
14377 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
14378 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_s16(int16x4_t a,int16x4_t b)14379 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
14380   return vshl_s16(a, b);
14381 }
14382 
14383 // CHECK-LABEL: @test_vshl_s32(
14384 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14385 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14386 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
14387 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
14388 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_s32(int32x2_t a,int32x2_t b)14389 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
14390   return vshl_s32(a, b);
14391 }
14392 
14393 // CHECK-LABEL: @test_vshl_s64(
14394 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14395 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14396 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
14397 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
14398 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_s64(int64x1_t a,int64x1_t b)14399 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
14400   return vshl_s64(a, b);
14401 }
14402 
14403 // CHECK-LABEL: @test_vshl_u8(
14404 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
14405 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)14406 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
14407   return vshl_u8(a, b);
14408 }
14409 
14410 // CHECK-LABEL: @test_vshl_u16(
14411 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14412 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14413 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
14414 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
14415 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_u16(uint16x4_t a,int16x4_t b)14416 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
14417   return vshl_u16(a, b);
14418 }
14419 
14420 // CHECK-LABEL: @test_vshl_u32(
14421 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14422 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14423 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
14424 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
14425 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_u32(uint32x2_t a,int32x2_t b)14426 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
14427   return vshl_u32(a, b);
14428 }
14429 
14430 // CHECK-LABEL: @test_vshl_u64(
14431 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14432 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14433 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
14434 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
14435 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_u64(uint64x1_t a,int64x1_t b)14436 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
14437   return vshl_u64(a, b);
14438 }
14439 
14440 // CHECK-LABEL: @test_vshlq_s8(
14441 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
14442 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)14443 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
14444   return vshlq_s8(a, b);
14445 }
14446 
14447 // CHECK-LABEL: @test_vshlq_s16(
14448 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14449 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14450 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
14451 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14452 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_s16(int16x8_t a,int16x8_t b)14453 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
14454   return vshlq_s16(a, b);
14455 }
14456 
14457 // CHECK-LABEL: @test_vshlq_s32(
14458 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14459 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14460 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
14461 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14462 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_s32(int32x4_t a,int32x4_t b)14463 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
14464   return vshlq_s32(a, b);
14465 }
14466 
14467 // CHECK-LABEL: @test_vshlq_s64(
14468 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14469 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14470 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
14471 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14472 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_s64(int64x2_t a,int64x2_t b)14473 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
14474   return vshlq_s64(a, b);
14475 }
14476 
14477 // CHECK-LABEL: @test_vshlq_u8(
14478 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
14479 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)14480 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
14481   return vshlq_u8(a, b);
14482 }
14483 
14484 // CHECK-LABEL: @test_vshlq_u16(
14485 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14486 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14487 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
14488 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14489 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)14490 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
14491   return vshlq_u16(a, b);
14492 }
14493 
14494 // CHECK-LABEL: @test_vshlq_u32(
14495 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14496 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14497 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
14498 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14499 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)14500 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
14501   return vshlq_u32(a, b);
14502 }
14503 
14504 // CHECK-LABEL: @test_vshlq_u64(
14505 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14506 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14507 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
14508 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14509 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)14510 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
14511   return vshlq_u64(a, b);
14512 }
14513 
14514 // CHECK-LABEL: @test_vshll_n_s8(
14515 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
14516 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14517 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)14518 int16x8_t test_vshll_n_s8(int8x8_t a) {
14519   return vshll_n_s8(a, 1);
14520 }
14521 
14522 // CHECK-LABEL: @test_vshll_n_s16(
14523 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14524 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14525 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
14526 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14527 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)14528 int32x4_t test_vshll_n_s16(int16x4_t a) {
14529   return vshll_n_s16(a, 1);
14530 }
14531 
14532 // CHECK-LABEL: @test_vshll_n_s32(
14533 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14534 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14535 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
14536 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14537 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)14538 int64x2_t test_vshll_n_s32(int32x2_t a) {
14539   return vshll_n_s32(a, 1);
14540 }
14541 
14542 // CHECK-LABEL: @test_vshll_n_u8(
14543 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
14544 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14545 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)14546 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
14547   return vshll_n_u8(a, 1);
14548 }
14549 
14550 // CHECK-LABEL: @test_vshll_n_u16(
14551 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14552 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14553 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
14554 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14555 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)14556 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
14557   return vshll_n_u16(a, 1);
14558 }
14559 
14560 // CHECK-LABEL: @test_vshll_n_u32(
14561 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14562 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14563 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
14564 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14565 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)14566 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
14567   return vshll_n_u32(a, 1);
14568 }
14569 
14570 // CHECK-LABEL: @test_vshl_n_s8(
14571 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14572 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)14573 int8x8_t test_vshl_n_s8(int8x8_t a) {
14574   return vshl_n_s8(a, 1);
14575 }
14576 
14577 // CHECK-LABEL: @test_vshl_n_s16(
14578 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14579 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14580 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14581 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)14582 int16x4_t test_vshl_n_s16(int16x4_t a) {
14583   return vshl_n_s16(a, 1);
14584 }
14585 
14586 // CHECK-LABEL: @test_vshl_n_s32(
14587 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14588 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14589 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14590 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)14591 int32x2_t test_vshl_n_s32(int32x2_t a) {
14592   return vshl_n_s32(a, 1);
14593 }
14594 
14595 // CHECK-LABEL: @test_vshl_n_s64(
14596 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14597 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14598 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14599 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)14600 int64x1_t test_vshl_n_s64(int64x1_t a) {
14601   return vshl_n_s64(a, 1);
14602 }
14603 
14604 // CHECK-LABEL: @test_vshl_n_u8(
14605 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14606 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(uint8x8_t a)14607 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
14608   return vshl_n_u8(a, 1);
14609 }
14610 
14611 // CHECK-LABEL: @test_vshl_n_u16(
14612 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14613 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14614 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14615 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(uint16x4_t a)14616 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
14617   return vshl_n_u16(a, 1);
14618 }
14619 
14620 // CHECK-LABEL: @test_vshl_n_u32(
14621 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14622 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14623 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14624 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(uint32x2_t a)14625 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
14626   return vshl_n_u32(a, 1);
14627 }
14628 
14629 // CHECK-LABEL: @test_vshl_n_u64(
14630 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14631 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14632 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14633 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)14634 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14635   return vshl_n_u64(a, 1);
14636 }
14637 
14638 // CHECK-LABEL: @test_vshlq_n_s8(
14639 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14640 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)14641 int8x16_t test_vshlq_n_s8(int8x16_t a) {
14642   return vshlq_n_s8(a, 1);
14643 }
14644 
14645 // CHECK-LABEL: @test_vshlq_n_s16(
14646 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14647 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14648 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14649 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)14650 int16x8_t test_vshlq_n_s16(int16x8_t a) {
14651   return vshlq_n_s16(a, 1);
14652 }
14653 
14654 // CHECK-LABEL: @test_vshlq_n_s32(
14655 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14656 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14657 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14658 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)14659 int32x4_t test_vshlq_n_s32(int32x4_t a) {
14660   return vshlq_n_s32(a, 1);
14661 }
14662 
14663 // CHECK-LABEL: @test_vshlq_n_s64(
14664 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14665 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14666 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14667 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)14668 int64x2_t test_vshlq_n_s64(int64x2_t a) {
14669   return vshlq_n_s64(a, 1);
14670 }
14671 
14672 // CHECK-LABEL: @test_vshlq_n_u8(
14673 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14674 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(uint8x16_t a)14675 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
14676   return vshlq_n_u8(a, 1);
14677 }
14678 
14679 // CHECK-LABEL: @test_vshlq_n_u16(
14680 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14681 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14682 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14683 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(uint16x8_t a)14684 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
14685   return vshlq_n_u16(a, 1);
14686 }
14687 
14688 // CHECK-LABEL: @test_vshlq_n_u32(
14689 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14690 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14691 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14692 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(uint32x4_t a)14693 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
14694   return vshlq_n_u32(a, 1);
14695 }
14696 
14697 // CHECK-LABEL: @test_vshlq_n_u64(
14698 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14699 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14700 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14701 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(uint64x2_t a)14702 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
14703   return vshlq_n_u64(a, 1);
14704 }
14705 
14706 // CHECK-LABEL: @test_vshrn_n_s16(
14707 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14708 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14709 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14710 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14711 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)14712 int8x8_t test_vshrn_n_s16(int16x8_t a) {
14713   return vshrn_n_s16(a, 1);
14714 }
14715 
14716 // CHECK-LABEL: @test_vshrn_n_s32(
14717 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14718 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14719 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14720 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14721 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)14722 int16x4_t test_vshrn_n_s32(int32x4_t a) {
14723   return vshrn_n_s32(a, 1);
14724 }
14725 
14726 // CHECK-LABEL: @test_vshrn_n_s64(
14727 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14728 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14729 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14730 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14731 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)14732 int32x2_t test_vshrn_n_s64(int64x2_t a) {
14733   return vshrn_n_s64(a, 1);
14734 }
14735 
14736 // CHECK-LABEL: @test_vshrn_n_u16(
14737 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14738 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14739 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14740 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14741 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)14742 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
14743   return vshrn_n_u16(a, 1);
14744 }
14745 
14746 // CHECK-LABEL: @test_vshrn_n_u32(
14747 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14748 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14749 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14750 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14751 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)14752 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
14753   return vshrn_n_u32(a, 1);
14754 }
14755 
14756 // CHECK-LABEL: @test_vshrn_n_u64(
14757 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14758 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14759 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14760 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14761 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)14762 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
14763   return vshrn_n_u64(a, 1);
14764 }
14765 
14766 // CHECK-LABEL: @test_vshr_n_s8(
14767 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14768 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)14769 int8x8_t test_vshr_n_s8(int8x8_t a) {
14770   return vshr_n_s8(a, 1);
14771 }
14772 
14773 // CHECK-LABEL: @test_vshr_n_s16(
14774 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14775 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14776 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14777 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)14778 int16x4_t test_vshr_n_s16(int16x4_t a) {
14779   return vshr_n_s16(a, 1);
14780 }
14781 
14782 // CHECK-LABEL: @test_vshr_n_s32(
14783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14784 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14785 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1>
14786 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)14787 int32x2_t test_vshr_n_s32(int32x2_t a) {
14788   return vshr_n_s32(a, 1);
14789 }
14790 
14791 // CHECK-LABEL: @test_vshr_n_s64(
14792 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14793 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14794 // CHECK:   [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14795 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)14796 int64x1_t test_vshr_n_s64(int64x1_t a) {
14797   return vshr_n_s64(a, 1);
14798 }
14799 
14800 // CHECK-LABEL: @test_vshr_n_u8(
14801 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14802 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(uint8x8_t a)14803 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
14804   return vshr_n_u8(a, 1);
14805 }
14806 
14807 // CHECK-LABEL: @test_vshr_n_u16(
14808 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14809 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14810 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14811 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(uint16x4_t a)14812 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
14813   return vshr_n_u16(a, 1);
14814 }
14815 
14816 // CHECK-LABEL: @test_vshr_n_u32(
14817 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14818 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14819 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
14820 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(uint32x2_t a)14821 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
14822   return vshr_n_u32(a, 1);
14823 }
14824 
14825 // CHECK-LABEL: @test_vshr_n_u64(
14826 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14827 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14828 // CHECK:   [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14829 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)14830 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14831   return vshr_n_u64(a, 1);
14832 }
14833 
14834 // CHECK-LABEL: @test_vshrq_n_s8(
14835 // CHECK:   [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14836 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)14837 int8x16_t test_vshrq_n_s8(int8x16_t a) {
14838   return vshrq_n_s8(a, 1);
14839 }
14840 
14841 // CHECK-LABEL: @test_vshrq_n_s16(
14842 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14843 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14844 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14845 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)14846 int16x8_t test_vshrq_n_s16(int16x8_t a) {
14847   return vshrq_n_s16(a, 1);
14848 }
14849 
14850 // CHECK-LABEL: @test_vshrq_n_s32(
14851 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14852 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14853 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14854 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)14855 int32x4_t test_vshrq_n_s32(int32x4_t a) {
14856   return vshrq_n_s32(a, 1);
14857 }
14858 
14859 // CHECK-LABEL: @test_vshrq_n_s64(
14860 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14861 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14862 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14863 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)14864 int64x2_t test_vshrq_n_s64(int64x2_t a) {
14865   return vshrq_n_s64(a, 1);
14866 }
14867 
14868 // CHECK-LABEL: @test_vshrq_n_u8(
14869 // CHECK:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14870 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(uint8x16_t a)14871 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
14872   return vshrq_n_u8(a, 1);
14873 }
14874 
14875 // CHECK-LABEL: @test_vshrq_n_u16(
14876 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14877 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14878 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14879 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(uint16x8_t a)14880 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
14881   return vshrq_n_u16(a, 1);
14882 }
14883 
14884 // CHECK-LABEL: @test_vshrq_n_u32(
14885 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14886 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14887 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14888 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(uint32x4_t a)14889 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
14890   return vshrq_n_u32(a, 1);
14891 }
14892 
14893 // CHECK-LABEL: @test_vshrq_n_u64(
14894 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14895 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14896 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14897 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(uint64x2_t a)14898 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
14899   return vshrq_n_u64(a, 1);
14900 }
14901 
14902 // CHECK-LABEL: @test_vsli_n_s8(
14903 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14904 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)14905 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
14906   return vsli_n_s8(a, b, 1);
14907 }
14908 
14909 // CHECK-LABEL: @test_vsli_n_s16(
14910 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14911 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14912 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14913 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14914 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14915 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)14916 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
14917   return vsli_n_s16(a, b, 1);
14918 }
14919 
14920 // CHECK-LABEL: @test_vsli_n_s32(
14921 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14922 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14923 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14924 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14925 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
14926 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)14927 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
14928   return vsli_n_s32(a, b, 1);
14929 }
14930 
14931 // CHECK-LABEL: @test_vsli_n_s64(
14932 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14933 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14934 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14935 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14936 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
14937 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)14938 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14939   return vsli_n_s64(a, b, 1);
14940 }
14941 
14942 // CHECK-LABEL: @test_vsli_n_u8(
14943 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14944 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)14945 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
14946   return vsli_n_u8(a, b, 1);
14947 }
14948 
14949 // CHECK-LABEL: @test_vsli_n_u16(
14950 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14951 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14952 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14953 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14954 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14955 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)14956 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
14957   return vsli_n_u16(a, b, 1);
14958 }
14959 
14960 // CHECK-LABEL: @test_vsli_n_u32(
14961 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14962 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14963 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14964 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14965 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
14966 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)14967 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
14968   return vsli_n_u32(a, b, 1);
14969 }
14970 
14971 // CHECK-LABEL: @test_vsli_n_u64(
14972 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14973 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14974 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14975 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14976 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
14977 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)14978 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
14979   return vsli_n_u64(a, b, 1);
14980 }
14981 
14982 // CHECK-LABEL: @test_vsli_n_p8(
14983 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14984 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)14985 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
14986   return vsli_n_p8(a, b, 1);
14987 }
14988 
14989 // CHECK-LABEL: @test_vsli_n_p16(
14990 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14991 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14992 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14993 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14994 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14995 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)14996 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
14997   return vsli_n_p16(a, b, 1);
14998 }
14999 
15000 // CHECK-LABEL: @test_vsliq_n_s8(
15001 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15002 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)15003 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
15004   return vsliq_n_s8(a, b, 1);
15005 }
15006 
15007 // CHECK-LABEL: @test_vsliq_n_s16(
15008 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15009 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15010 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15011 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15012 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15013 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)15014 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
15015   return vsliq_n_s16(a, b, 1);
15016 }
15017 
15018 // CHECK-LABEL: @test_vsliq_n_s32(
15019 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15020 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15021 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15022 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15023 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
15024 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)15025 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
15026   return vsliq_n_s32(a, b, 1);
15027 }
15028 
15029 // CHECK-LABEL: @test_vsliq_n_s64(
15030 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15031 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15032 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15033 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15034 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
15035 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)15036 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
15037   return vsliq_n_s64(a, b, 1);
15038 }
15039 
15040 // CHECK-LABEL: @test_vsliq_n_u8(
15041 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15042 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)15043 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
15044   return vsliq_n_u8(a, b, 1);
15045 }
15046 
15047 // CHECK-LABEL: @test_vsliq_n_u16(
15048 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15049 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15050 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15051 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15052 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15053 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)15054 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
15055   return vsliq_n_u16(a, b, 1);
15056 }
15057 
15058 // CHECK-LABEL: @test_vsliq_n_u32(
15059 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15060 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15061 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15062 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15063 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
15064 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)15065 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
15066   return vsliq_n_u32(a, b, 1);
15067 }
15068 
15069 // CHECK-LABEL: @test_vsliq_n_u64(
15070 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15071 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15072 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15073 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15074 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
15075 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)15076 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
15077   return vsliq_n_u64(a, b, 1);
15078 }
15079 
15080 // CHECK-LABEL: @test_vsliq_n_p8(
15081 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15082 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)15083 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
15084   return vsliq_n_p8(a, b, 1);
15085 }
15086 
15087 // CHECK-LABEL: @test_vsliq_n_p16(
15088 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15089 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15090 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15091 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15092 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15093 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)15094 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
15095   return vsliq_n_p16(a, b, 1);
15096 }
15097 
15098 // CHECK-LABEL: @test_vsra_n_s8(
15099 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15100 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
15101 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)15102 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
15103   return vsra_n_s8(a, b, 1);
15104 }
15105 
15106 // CHECK-LABEL: @test_vsra_n_s16(
15107 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15108 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15109 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15110 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15111 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
15112 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
15113 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)15114 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
15115   return vsra_n_s16(a, b, 1);
15116 }
15117 
15118 // CHECK-LABEL: @test_vsra_n_s32(
15119 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15120 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15121 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15122 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15123 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 1, i32 1>
15124 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
15125 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)15126 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
15127   return vsra_n_s32(a, b, 1);
15128 }
15129 
15130 // CHECK-LABEL: @test_vsra_n_s64(
15131 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15132 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15133 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15134 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15135 // CHECK:   [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
15136 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
15137 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)15138 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
15139   return vsra_n_s64(a, b, 1);
15140 }
15141 
15142 // CHECK-LABEL: @test_vsra_n_u8(
15143 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15144 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
15145 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_u8(uint8x8_t a,uint8x8_t b)15146 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
15147   return vsra_n_u8(a, b, 1);
15148 }
15149 
15150 // CHECK-LABEL: @test_vsra_n_u16(
15151 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15152 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15153 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15154 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15155 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
15156 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
15157 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_u16(uint16x4_t a,uint16x4_t b)15158 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
15159   return vsra_n_u16(a, b, 1);
15160 }
15161 
15162 // CHECK-LABEL: @test_vsra_n_u32(
15163 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15164 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15165 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15166 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15167 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1>
15168 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
15169 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_u32(uint32x2_t a,uint32x2_t b)15170 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
15171   return vsra_n_u32(a, b, 1);
15172 }
15173 
15174 // CHECK-LABEL: @test_vsra_n_u64(
15175 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15176 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15177 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15178 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15179 // CHECK:   [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
15180 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
15181 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)15182 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
15183   return vsra_n_u64(a, b, 1);
15184 }
15185 
15186 // CHECK-LABEL: @test_vsraq_n_s8(
15187 // CHECK:   [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15188 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
15189 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)15190 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
15191   return vsraq_n_s8(a, b, 1);
15192 }
15193 
15194 // CHECK-LABEL: @test_vsraq_n_s16(
15195 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15196 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15197 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15198 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15199 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
15200 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
15201 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)15202 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
15203   return vsraq_n_s16(a, b, 1);
15204 }
15205 
15206 // CHECK-LABEL: @test_vsraq_n_s32(
15207 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15208 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15209 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15210 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15211 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
15212 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
15213 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)15214 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
15215   return vsraq_n_s32(a, b, 1);
15216 }
15217 
15218 // CHECK-LABEL: @test_vsraq_n_s64(
15219 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15220 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15221 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15222 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15223 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 1, i64 1>
15224 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
15225 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)15226 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
15227   return vsraq_n_s64(a, b, 1);
15228 }
15229 
15230 // CHECK-LABEL: @test_vsraq_n_u8(
15231 // CHECK:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15232 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
15233 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(uint8x16_t a,uint8x16_t b)15234 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
15235   return vsraq_n_u8(a, b, 1);
15236 }
15237 
15238 // CHECK-LABEL: @test_vsraq_n_u16(
15239 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15240 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15241 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15242 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15243 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
15244 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
15245 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(uint16x8_t a,uint16x8_t b)15246 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
15247   return vsraq_n_u16(a, b, 1);
15248 }
15249 
15250 // CHECK-LABEL: @test_vsraq_n_u32(
15251 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15252 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15253 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15254 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15255 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
15256 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
15257 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(uint32x4_t a,uint32x4_t b)15258 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
15259   return vsraq_n_u32(a, b, 1);
15260 }
15261 
15262 // CHECK-LABEL: @test_vsraq_n_u64(
15263 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15264 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15265 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15266 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15267 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 1, i64 1>
15268 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
15269 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(uint64x2_t a,uint64x2_t b)15270 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
15271   return vsraq_n_u64(a, b, 1);
15272 }
15273 
15274 // CHECK-LABEL: @test_vsri_n_s8(
15275 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15276 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)15277 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
15278   return vsri_n_s8(a, b, 1);
15279 }
15280 
15281 // CHECK-LABEL: @test_vsri_n_s16(
15282 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15283 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15284 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15285 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15286 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15287 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)15288 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
15289   return vsri_n_s16(a, b, 1);
15290 }
15291 
15292 // CHECK-LABEL: @test_vsri_n_s32(
15293 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15294 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15295 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15296 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15297 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
15298 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)15299 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
15300   return vsri_n_s32(a, b, 1);
15301 }
15302 
15303 // CHECK-LABEL: @test_vsri_n_s64(
15304 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15305 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15306 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15307 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15308 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
15309 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)15310 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
15311   return vsri_n_s64(a, b, 1);
15312 }
15313 
15314 // CHECK-LABEL: @test_vsri_n_u8(
15315 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15316 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsri_n_u8(uint8x8_t a,uint8x8_t b)15317 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
15318   return vsri_n_u8(a, b, 1);
15319 }
15320 
15321 // CHECK-LABEL: @test_vsri_n_u16(
15322 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15323 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15324 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15325 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15326 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15327 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsri_n_u16(uint16x4_t a,uint16x4_t b)15328 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
15329   return vsri_n_u16(a, b, 1);
15330 }
15331 
15332 // CHECK-LABEL: @test_vsri_n_u32(
15333 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15334 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15335 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15336 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15337 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
15338 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsri_n_u32(uint32x2_t a,uint32x2_t b)15339 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
15340   return vsri_n_u32(a, b, 1);
15341 }
15342 
15343 // CHECK-LABEL: @test_vsri_n_u64(
15344 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15345 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15346 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15347 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15348 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
15349 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)15350 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
15351   return vsri_n_u64(a, b, 1);
15352 }
15353 
15354 // CHECK-LABEL: @test_vsri_n_p8(
15355 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15356 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)15357 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
15358   return vsri_n_p8(a, b, 1);
15359 }
15360 
15361 // CHECK-LABEL: @test_vsri_n_p16(
15362 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15363 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15364 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15365 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15366 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15367 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)15368 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
15369   return vsri_n_p16(a, b, 1);
15370 }
15371 
15372 // CHECK-LABEL: @test_vsriq_n_s8(
15373 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15374 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)15375 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
15376   return vsriq_n_s8(a, b, 1);
15377 }
15378 
15379 // CHECK-LABEL: @test_vsriq_n_s16(
15380 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15381 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15382 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15383 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15384 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15385 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)15386 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
15387   return vsriq_n_s16(a, b, 1);
15388 }
15389 
15390 // CHECK-LABEL: @test_vsriq_n_s32(
15391 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15392 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15393 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15394 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15395 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
15396 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)15397 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
15398   return vsriq_n_s32(a, b, 1);
15399 }
15400 
15401 // CHECK-LABEL: @test_vsriq_n_s64(
15402 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15403 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15404 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15405 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15406 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15407 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)15408 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
15409   return vsriq_n_s64(a, b, 1);
15410 }
15411 
15412 // CHECK-LABEL: @test_vsriq_n_u8(
15413 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15414 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsriq_n_u8(uint8x16_t a,uint8x16_t b)15415 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
15416   return vsriq_n_u8(a, b, 1);
15417 }
15418 
15419 // CHECK-LABEL: @test_vsriq_n_u16(
15420 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15421 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15422 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15423 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15424 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15425 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_u16(uint16x8_t a,uint16x8_t b)15426 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
15427   return vsriq_n_u16(a, b, 1);
15428 }
15429 
15430 // CHECK-LABEL: @test_vsriq_n_u32(
15431 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15432 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15433 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15434 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15435 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
15436 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_u32(uint32x4_t a,uint32x4_t b)15437 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
15438   return vsriq_n_u32(a, b, 1);
15439 }
15440 
15441 // CHECK-LABEL: @test_vsriq_n_u64(
15442 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15443 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15444 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15445 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15446 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15447 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_u64(uint64x2_t a,uint64x2_t b)15448 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
15449   return vsriq_n_u64(a, b, 1);
15450 }
15451 
15452 // CHECK-LABEL: @test_vsriq_n_p8(
15453 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15454 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)15455 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
15456   return vsriq_n_p8(a, b, 1);
15457 }
15458 
15459 // CHECK-LABEL: @test_vsriq_n_p16(
15460 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15461 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15462 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15463 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15464 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15465 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)15466 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
15467   return vsriq_n_p16(a, b, 1);
15468 }
15469 
15470 // CHECK-LABEL: @test_vst1q_u8(
15471 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15472 // CHECK:   ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)15473 void test_vst1q_u8(uint8_t * a, uint8x16_t b) {
15474   vst1q_u8(a, b);
15475 }
15476 
15477 // CHECK-LABEL: @test_vst1q_u16(
15478 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15479 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15480 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15481 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15482 // CHECK:   ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)15483 void test_vst1q_u16(uint16_t * a, uint16x8_t b) {
15484   vst1q_u16(a, b);
15485 }
15486 
15487 // CHECK-LABEL: @test_vst1q_u32(
15488 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15489 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15490 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15491 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
15492 // CHECK:   ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)15493 void test_vst1q_u32(uint32_t * a, uint32x4_t b) {
15494   vst1q_u32(a, b);
15495 }
15496 
15497 // CHECK-LABEL: @test_vst1q_u64(
15498 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15499 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15500 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15501 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
15502 // CHECK:   ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)15503 void test_vst1q_u64(uint64_t * a, uint64x2_t b) {
15504   vst1q_u64(a, b);
15505 }
15506 
15507 // CHECK-LABEL: @test_vst1q_s8(
15508 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15509 // CHECK:   ret void
test_vst1q_s8(int8_t * a,int8x16_t b)15510 void test_vst1q_s8(int8_t * a, int8x16_t b) {
15511   vst1q_s8(a, b);
15512 }
15513 
15514 // CHECK-LABEL: @test_vst1q_s16(
15515 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15516 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15517 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15518 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15519 // CHECK:   ret void
test_vst1q_s16(int16_t * a,int16x8_t b)15520 void test_vst1q_s16(int16_t * a, int16x8_t b) {
15521   vst1q_s16(a, b);
15522 }
15523 
15524 // CHECK-LABEL: @test_vst1q_s32(
15525 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15526 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15527 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15528 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
15529 // CHECK:   ret void
test_vst1q_s32(int32_t * a,int32x4_t b)15530 void test_vst1q_s32(int32_t * a, int32x4_t b) {
15531   vst1q_s32(a, b);
15532 }
15533 
15534 // CHECK-LABEL: @test_vst1q_s64(
15535 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15536 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15537 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15538 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
15539 // CHECK:   ret void
test_vst1q_s64(int64_t * a,int64x2_t b)15540 void test_vst1q_s64(int64_t * a, int64x2_t b) {
15541   vst1q_s64(a, b);
15542 }
15543 
15544 // CHECK-LABEL: @test_vst1q_f16(
15545 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
15546 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15547 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15548 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* [[TMP0]], <8 x half> [[TMP2]], i32 2)
15549 // CHECK:   ret void
test_vst1q_f16(float16_t * a,float16x8_t b)15550 void test_vst1q_f16(float16_t * a, float16x8_t b) {
15551   vst1q_f16(a, b);
15552 }
15553 
15554 // CHECK-LABEL: @test_vst1q_f32(
15555 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
15556 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15557 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15558 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* [[TMP0]], <4 x float> [[TMP2]], i32 4)
15559 // CHECK:   ret void
test_vst1q_f32(float32_t * a,float32x4_t b)15560 void test_vst1q_f32(float32_t * a, float32x4_t b) {
15561   vst1q_f32(a, b);
15562 }
15563 
15564 // CHECK-LABEL: @test_vst1q_p8(
15565 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15566 // CHECK:   ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)15567 void test_vst1q_p8(poly8_t * a, poly8x16_t b) {
15568   vst1q_p8(a, b);
15569 }
15570 
15571 // CHECK-LABEL: @test_vst1q_p16(
15572 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15573 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15574 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15575 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15576 // CHECK:   ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)15577 void test_vst1q_p16(poly16_t * a, poly16x8_t b) {
15578   vst1q_p16(a, b);
15579 }
15580 
15581 // CHECK-LABEL: @test_vst1_u8(
15582 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15583 // CHECK:   ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)15584 void test_vst1_u8(uint8_t * a, uint8x8_t b) {
15585   vst1_u8(a, b);
15586 }
15587 
15588 // CHECK-LABEL: @test_vst1_u16(
15589 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15590 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15591 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15592 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15593 // CHECK:   ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)15594 void test_vst1_u16(uint16_t * a, uint16x4_t b) {
15595   vst1_u16(a, b);
15596 }
15597 
15598 // CHECK-LABEL: @test_vst1_u32(
15599 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15600 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15601 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15602 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
15603 // CHECK:   ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)15604 void test_vst1_u32(uint32_t * a, uint32x2_t b) {
15605   vst1_u32(a, b);
15606 }
15607 
15608 // CHECK-LABEL: @test_vst1_u64(
15609 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15610 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15611 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15612 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
15613 // CHECK:   ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)15614 void test_vst1_u64(uint64_t * a, uint64x1_t b) {
15615   vst1_u64(a, b);
15616 }
15617 
15618 // CHECK-LABEL: @test_vst1_s8(
15619 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15620 // CHECK:   ret void
test_vst1_s8(int8_t * a,int8x8_t b)15621 void test_vst1_s8(int8_t * a, int8x8_t b) {
15622   vst1_s8(a, b);
15623 }
15624 
15625 // CHECK-LABEL: @test_vst1_s16(
15626 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15627 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15628 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15629 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15630 // CHECK:   ret void
test_vst1_s16(int16_t * a,int16x4_t b)15631 void test_vst1_s16(int16_t * a, int16x4_t b) {
15632   vst1_s16(a, b);
15633 }
15634 
15635 // CHECK-LABEL: @test_vst1_s32(
15636 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15637 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15638 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15639 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
15640 // CHECK:   ret void
test_vst1_s32(int32_t * a,int32x2_t b)15641 void test_vst1_s32(int32_t * a, int32x2_t b) {
15642   vst1_s32(a, b);
15643 }
15644 
15645 // CHECK-LABEL: @test_vst1_s64(
15646 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15647 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15648 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15649 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
15650 // CHECK:   ret void
test_vst1_s64(int64_t * a,int64x1_t b)15651 void test_vst1_s64(int64_t * a, int64x1_t b) {
15652   vst1_s64(a, b);
15653 }
15654 
15655 // CHECK-LABEL: @test_vst1_f16(
15656 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
15657 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15658 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15659 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* [[TMP0]], <4 x half> [[TMP2]], i32 2)
15660 // CHECK:   ret void
test_vst1_f16(float16_t * a,float16x4_t b)15661 void test_vst1_f16(float16_t * a, float16x4_t b) {
15662   vst1_f16(a, b);
15663 }
15664 
15665 // CHECK-LABEL: @test_vst1_f32(
15666 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
15667 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15668 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15669 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* [[TMP0]], <2 x float> [[TMP2]], i32 4)
15670 // CHECK:   ret void
test_vst1_f32(float32_t * a,float32x2_t b)15671 void test_vst1_f32(float32_t * a, float32x2_t b) {
15672   vst1_f32(a, b);
15673 }
15674 
15675 // CHECK-LABEL: @test_vst1_p8(
15676 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15677 // CHECK:   ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)15678 void test_vst1_p8(poly8_t * a, poly8x8_t b) {
15679   vst1_p8(a, b);
15680 }
15681 
15682 // CHECK-LABEL: @test_vst1_p16(
15683 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15684 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15685 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15686 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15687 // CHECK:   ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)15688 void test_vst1_p16(poly16_t * a, poly16x4_t b) {
15689   vst1_p16(a, b);
15690 }
15691 
15692 // CHECK-LABEL: @test_vst1q_lane_u8(
15693 // CHECK:   [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15694 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15695 // CHECK:   ret void
test_vst1q_lane_u8(uint8_t * a,uint8x16_t b)15696 void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) {
15697   vst1q_lane_u8(a, b, 15);
15698 }
15699 
15700 // CHECK-LABEL: @test_vst1q_lane_u16(
15701 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15702 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15703 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15704 // CHECK:   [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15705 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15706 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15707 // CHECK:   ret void
test_vst1q_lane_u16(uint16_t * a,uint16x8_t b)15708 void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) {
15709   vst1q_lane_u16(a, b, 7);
15710 }
15711 
15712 // CHECK-LABEL: @test_vst1q_lane_u32(
15713 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15714 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15715 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15716 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15717 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15718 // CHECK:   store i32 [[TMP3]], i32* [[TMP4]], align 4
15719 // CHECK:   ret void
test_vst1q_lane_u32(uint32_t * a,uint32x4_t b)15720 void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) {
15721   vst1q_lane_u32(a, b, 3);
15722 }
15723 
15724 // CHECK-LABEL: @test_vst1q_lane_u64(
15725 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15726 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15727 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15728 // CHECK:   [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15729 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
15730 // CHECK:   ret void
test_vst1q_lane_u64(uint64_t * a,uint64x2_t b)15731 void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) {
15732   vst1q_lane_u64(a, b, 1);
15733 }
15734 
15735 // CHECK-LABEL: @test_vst1q_lane_s8(
15736 // CHECK:   [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15737 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15738 // CHECK:   ret void
test_vst1q_lane_s8(int8_t * a,int8x16_t b)15739 void test_vst1q_lane_s8(int8_t * a, int8x16_t b) {
15740   vst1q_lane_s8(a, b, 15);
15741 }
15742 
15743 // CHECK-LABEL: @test_vst1q_lane_s16(
15744 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15745 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15746 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15747 // CHECK:   [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15748 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15749 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15750 // CHECK:   ret void
test_vst1q_lane_s16(int16_t * a,int16x8_t b)15751 void test_vst1q_lane_s16(int16_t * a, int16x8_t b) {
15752   vst1q_lane_s16(a, b, 7);
15753 }
15754 
15755 // CHECK-LABEL: @test_vst1q_lane_s32(
15756 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15757 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15758 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15759 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15760 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15761 // CHECK:   store i32 [[TMP3]], i32* [[TMP4]], align 4
15762 // CHECK:   ret void
test_vst1q_lane_s32(int32_t * a,int32x4_t b)15763 void test_vst1q_lane_s32(int32_t * a, int32x4_t b) {
15764   vst1q_lane_s32(a, b, 3);
15765 }
15766 
15767 // CHECK-LABEL: @test_vst1q_lane_s64(
15768 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15769 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15770 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15771 // CHECK:   [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15772 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
15773 // CHECK:   ret void
test_vst1q_lane_s64(int64_t * a,int64x2_t b)15774 void test_vst1q_lane_s64(int64_t * a, int64x2_t b) {
15775   vst1q_lane_s64(a, b, 1);
15776 }
15777 
15778 // CHECK-LABEL: @test_vst1q_lane_f16(
15779 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
15780 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15781 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15782 // CHECK:   [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
15783 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
15784 // CHECK:   store half [[TMP3]], half* [[TMP4]], align 2
15785 // CHECK:   ret void
test_vst1q_lane_f16(float16_t * a,float16x8_t b)15786 void test_vst1q_lane_f16(float16_t * a, float16x8_t b) {
15787   vst1q_lane_f16(a, b, 7);
15788 }
15789 
15790 // CHECK-LABEL: @test_vst1q_lane_f32(
15791 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
15792 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15793 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15794 // CHECK:   [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
15795 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
15796 // CHECK:   store float [[TMP3]], float* [[TMP4]], align 4
15797 // CHECK:   ret void
test_vst1q_lane_f32(float32_t * a,float32x4_t b)15798 void test_vst1q_lane_f32(float32_t * a, float32x4_t b) {
15799   vst1q_lane_f32(a, b, 3);
15800 }
15801 
15802 // CHECK-LABEL: @test_vst1q_lane_p8(
15803 // CHECK:   [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15804 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15805 // CHECK:   ret void
test_vst1q_lane_p8(poly8_t * a,poly8x16_t b)15806 void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) {
15807   vst1q_lane_p8(a, b, 15);
15808 }
15809 
15810 // CHECK-LABEL: @test_vst1q_lane_p16(
15811 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15812 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15813 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15814 // CHECK:   [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15815 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15816 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15817 // CHECK:   ret void
test_vst1q_lane_p16(poly16_t * a,poly16x8_t b)15818 void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) {
15819   vst1q_lane_p16(a, b, 7);
15820 }
15821 
15822 // CHECK-LABEL: @test_vst1_lane_u8(
15823 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15824 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15825 // CHECK:   ret void
test_vst1_lane_u8(uint8_t * a,uint8x8_t b)15826 void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) {
15827   vst1_lane_u8(a, b, 7);
15828 }
15829 
15830 // CHECK-LABEL: @test_vst1_lane_u16(
15831 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15832 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15833 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15834 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15835 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15836 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15837 // CHECK:   ret void
test_vst1_lane_u16(uint16_t * a,uint16x4_t b)15838 void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) {
15839   vst1_lane_u16(a, b, 3);
15840 }
15841 
15842 // CHECK-LABEL: @test_vst1_lane_u32(
15843 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15844 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15845 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15846 // CHECK:   [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15847 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15848 // CHECK:   store i32 [[TMP3]], i32* [[TMP4]], align 4
15849 // CHECK:   ret void
test_vst1_lane_u32(uint32_t * a,uint32x2_t b)15850 void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) {
15851   vst1_lane_u32(a, b, 1);
15852 }
15853 
15854 // CHECK-LABEL: @test_vst1_lane_u64(
15855 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15856 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15857 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15858 // CHECK:   [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15859 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
15860 // CHECK:   store i64 [[TMP3]], i64* [[TMP4]], align 4
15861 // CHECK:   ret void
test_vst1_lane_u64(uint64_t * a,uint64x1_t b)15862 void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) {
15863   vst1_lane_u64(a, b, 0);
15864 }
15865 
15866 // CHECK-LABEL: @test_vst1_lane_s8(
15867 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15868 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15869 // CHECK:   ret void
test_vst1_lane_s8(int8_t * a,int8x8_t b)15870 void test_vst1_lane_s8(int8_t * a, int8x8_t b) {
15871   vst1_lane_s8(a, b, 7);
15872 }
15873 
15874 // CHECK-LABEL: @test_vst1_lane_s16(
15875 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15876 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15877 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15878 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15879 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15880 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15881 // CHECK:   ret void
test_vst1_lane_s16(int16_t * a,int16x4_t b)15882 void test_vst1_lane_s16(int16_t * a, int16x4_t b) {
15883   vst1_lane_s16(a, b, 3);
15884 }
15885 
15886 // CHECK-LABEL: @test_vst1_lane_s32(
15887 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15888 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15889 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15890 // CHECK:   [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15891 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15892 // CHECK:   store i32 [[TMP3]], i32* [[TMP4]], align 4
15893 // CHECK:   ret void
test_vst1_lane_s32(int32_t * a,int32x2_t b)15894 void test_vst1_lane_s32(int32_t * a, int32x2_t b) {
15895   vst1_lane_s32(a, b, 1);
15896 }
15897 
15898 // CHECK-LABEL: @test_vst1_lane_s64(
15899 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15900 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15901 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15902 // CHECK:   [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15903 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
15904 // CHECK:   store i64 [[TMP3]], i64* [[TMP4]], align 4
15905 // CHECK:   ret void
test_vst1_lane_s64(int64_t * a,int64x1_t b)15906 void test_vst1_lane_s64(int64_t * a, int64x1_t b) {
15907   vst1_lane_s64(a, b, 0);
15908 }
15909 
15910 // CHECK-LABEL: @test_vst1_lane_f16(
15911 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
15912 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15913 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15914 // CHECK:   [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
15915 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
15916 // CHECK:   store half [[TMP3]], half* [[TMP4]], align 2
15917 // CHECK:   ret void
test_vst1_lane_f16(float16_t * a,float16x4_t b)15918 void test_vst1_lane_f16(float16_t * a, float16x4_t b) {
15919   vst1_lane_f16(a, b, 3);
15920 }
15921 
15922 // CHECK-LABEL: @test_vst1_lane_f32(
15923 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
15924 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15925 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15926 // CHECK:   [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
15927 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
15928 // CHECK:   store float [[TMP3]], float* [[TMP4]], align 4
15929 // CHECK:   ret void
test_vst1_lane_f32(float32_t * a,float32x2_t b)15930 void test_vst1_lane_f32(float32_t * a, float32x2_t b) {
15931   vst1_lane_f32(a, b, 1);
15932 }
15933 
15934 // CHECK-LABEL: @test_vst1_lane_p8(
15935 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15936 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15937 // CHECK:   ret void
test_vst1_lane_p8(poly8_t * a,poly8x8_t b)15938 void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) {
15939   vst1_lane_p8(a, b, 7);
15940 }
15941 
15942 // CHECK-LABEL: @test_vst1_lane_p16(
15943 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15944 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15945 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15946 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15947 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15948 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15949 // CHECK:   ret void
test_vst1_lane_p16(poly16_t * a,poly16x4_t b)15950 void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) {
15951   vst1_lane_p16(a, b, 3);
15952 }
15953 
15954 // CHECK-LABEL: @test_vst2q_u8(
15955 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
15956 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
15957 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
15958 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
15959 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
15960 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
15961 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
15962 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
15963 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15964 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
15965 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15966 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15967 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
15968 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15969 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
15970 // CHECK:   ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)15971 void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) {
15972   vst2q_u8(a, b);
15973 }
15974 
15975 // CHECK-LABEL: @test_vst2q_u16(
15976 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
15977 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
15978 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
15979 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
15980 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
15981 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
15982 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
15983 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
15984 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
15985 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15986 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
15987 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15988 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
15989 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15990 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
15991 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15992 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
15993 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
15994 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
15995 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
15996 // CHECK:   ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)15997 void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) {
15998   vst2q_u16(a, b);
15999 }
16000 
16001 // CHECK-LABEL: @test_vst2q_u32(
16002 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
16003 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
16004 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
16005 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16006 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16007 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
16008 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
16009 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16010 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16011 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16012 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16013 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16014 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16015 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16016 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16017 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16018 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16019 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16020 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16021 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
16022 // CHECK:   ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)16023 void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) {
16024   vst2q_u32(a, b);
16025 }
16026 
16027 // CHECK-LABEL: @test_vst2q_s8(
16028 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
16029 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
16030 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
16031 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16032 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16033 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
16034 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
16035 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16036 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
16037 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16038 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16039 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
16040 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16041 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16042 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16043 // CHECK:   ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)16044 void test_vst2q_s8(int8_t * a, int8x16x2_t b) {
16045   vst2q_s8(a, b);
16046 }
16047 
16048 // CHECK-LABEL: @test_vst2q_s16(
16049 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
16050 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
16051 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
16052 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16053 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16054 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
16055 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
16056 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16057 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16058 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16059 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16060 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16061 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16062 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16063 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16064 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16065 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16066 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16067 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16068 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16069 // CHECK:   ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)16070 void test_vst2q_s16(int16_t * a, int16x8x2_t b) {
16071   vst2q_s16(a, b);
16072 }
16073 
16074 // CHECK-LABEL: @test_vst2q_s32(
16075 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
16076 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
16077 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
16078 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16079 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16080 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
16081 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
16082 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16083 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16084 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16085 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16086 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16087 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16088 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16089 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16090 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16091 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16092 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16093 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16094 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
16095 // CHECK:   ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)16096 void test_vst2q_s32(int32_t * a, int32x4x2_t b) {
16097   vst2q_s32(a, b);
16098 }
16099 
16100 // CHECK-LABEL: @test_vst2q_f16(
16101 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
16102 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
16103 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
16104 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
16105 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16106 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
16107 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
16108 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16109 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
16110 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16111 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
16112 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16113 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16114 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16115 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
16116 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16117 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16118 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16119 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16120 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 2)
16121 // CHECK:   ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)16122 void test_vst2q_f16(float16_t * a, float16x8x2_t b) {
16123   vst2q_f16(a, b);
16124 }
16125 
16126 // CHECK-LABEL: @test_vst2q_f32(
16127 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
16128 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
16129 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
16130 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
16131 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16132 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
16133 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
16134 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16135 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
16136 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16137 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
16138 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16139 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16140 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16141 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
16142 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16143 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16144 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16145 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16146 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 4)
16147 // CHECK:   ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)16148 void test_vst2q_f32(float32_t * a, float32x4x2_t b) {
16149   vst2q_f32(a, b);
16150 }
16151 
16152 // CHECK-LABEL: @test_vst2q_p8(
16153 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
16154 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
16155 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
16156 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16157 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16158 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
16159 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
16160 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16161 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
16162 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16163 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16164 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
16165 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16166 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16167 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16168 // CHECK:   ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)16169 void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) {
16170   vst2q_p8(a, b);
16171 }
16172 
16173 // CHECK-LABEL: @test_vst2q_p16(
16174 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
16175 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
16176 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
16177 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16178 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16179 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
16180 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
16181 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16182 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16183 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16184 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16185 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16186 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16187 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16188 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16189 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16190 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16191 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16192 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16193 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16194 // CHECK:   ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)16195 void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) {
16196   vst2q_p16(a, b);
16197 }
16198 
16199 // CHECK-LABEL: @test_vst2_u8(
16200 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
16201 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
16202 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
16203 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16204 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16205 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
16206 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
16207 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16208 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16209 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16210 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16211 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16212 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16213 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16214 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16215 // CHECK:   ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)16216 void test_vst2_u8(uint8_t * a, uint8x8x2_t b) {
16217   vst2_u8(a, b);
16218 }
16219 
16220 // CHECK-LABEL: @test_vst2_u16(
16221 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
16222 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
16223 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
16224 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16225 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16226 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
16227 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
16228 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16229 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16230 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16231 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16232 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16233 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16234 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16235 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16236 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16237 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16238 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16239 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16240 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16241 // CHECK:   ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)16242 void test_vst2_u16(uint16_t * a, uint16x4x2_t b) {
16243   vst2_u16(a, b);
16244 }
16245 
16246 // CHECK-LABEL: @test_vst2_u32(
16247 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
16248 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
16249 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
16250 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16251 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16252 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
16253 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
16254 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16255 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16256 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16257 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16258 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16259 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16260 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16261 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16262 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16263 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16264 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16265 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16266 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
16267 // CHECK:   ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)16268 void test_vst2_u32(uint32_t * a, uint32x2x2_t b) {
16269   vst2_u32(a, b);
16270 }
16271 
16272 // CHECK-LABEL: @test_vst2_u64(
16273 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
16274 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
16275 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
16276 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
16277 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16278 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
16279 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
16280 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16281 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
16282 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
16283 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
16284 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16285 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16286 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
16287 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
16288 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16289 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16290 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16291 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16292 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
16293 // CHECK:   ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)16294 void test_vst2_u64(uint64_t * a, uint64x1x2_t b) {
16295   vst2_u64(a, b);
16296 }
16297 
16298 // CHECK-LABEL: @test_vst2_s8(
16299 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
16300 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
16301 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
16302 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16303 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16304 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
16305 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
16306 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16307 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16308 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16309 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16310 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16311 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16312 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16313 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16314 // CHECK:   ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)16315 void test_vst2_s8(int8_t * a, int8x8x2_t b) {
16316   vst2_s8(a, b);
16317 }
16318 
16319 // CHECK-LABEL: @test_vst2_s16(
16320 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
16321 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
16322 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
16323 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16324 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16325 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
16326 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
16327 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16328 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16329 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16330 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16331 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16332 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16333 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16334 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16335 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16336 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16337 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16338 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16339 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16340 // CHECK:   ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)16341 void test_vst2_s16(int16_t * a, int16x4x2_t b) {
16342   vst2_s16(a, b);
16343 }
16344 
16345 // CHECK-LABEL: @test_vst2_s32(
16346 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
16347 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
16348 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
16349 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16350 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16351 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
16352 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
16353 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16354 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16355 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16356 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16357 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16358 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16359 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16360 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16361 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16362 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16363 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16364 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16365 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
16366 // CHECK:   ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)16367 void test_vst2_s32(int32_t * a, int32x2x2_t b) {
16368   vst2_s32(a, b);
16369 }
16370 
16371 // CHECK-LABEL: @test_vst2_s64(
16372 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
16373 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
16374 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
16375 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
16376 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16377 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
16378 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
16379 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16380 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
16381 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
16382 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
16383 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16384 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16385 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
16386 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
16387 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16388 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16389 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16390 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16391 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
16392 // CHECK:   ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)16393 void test_vst2_s64(int64_t * a, int64x1x2_t b) {
16394   vst2_s64(a, b);
16395 }
16396 
16397 // CHECK-LABEL: @test_vst2_f16(
16398 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16399 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16400 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16401 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
16402 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16403 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16404 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16405 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16406 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
16407 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16408 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
16409 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16410 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16411 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16412 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
16413 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16414 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16415 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16416 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16417 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 2)
16418 // CHECK:   ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)16419 void test_vst2_f16(float16_t * a, float16x4x2_t b) {
16420   vst2_f16(a, b);
16421 }
16422 
16423 // CHECK-LABEL: @test_vst2_f32(
16424 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16425 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16426 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16427 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
16428 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16429 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16430 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16431 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16432 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
16433 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16434 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
16435 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16436 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16437 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16438 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
16439 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16440 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16441 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16442 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16443 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 4)
16444 // CHECK:   ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)16445 void test_vst2_f32(float32_t * a, float32x2x2_t b) {
16446   vst2_f32(a, b);
16447 }
16448 
16449 // CHECK-LABEL: @test_vst2_p8(
16450 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16451 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16452 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16453 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16454 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16455 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16456 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16457 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16458 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16459 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16460 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16461 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16462 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16463 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16464 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16465 // CHECK:   ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)16466 void test_vst2_p8(poly8_t * a, poly8x8x2_t b) {
16467   vst2_p8(a, b);
16468 }
16469 
16470 // CHECK-LABEL: @test_vst2_p16(
16471 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16472 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16473 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16474 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16475 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16476 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16477 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16478 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16479 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16480 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16481 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16482 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16483 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16484 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16485 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16486 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16487 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16488 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16489 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16490 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16491 // CHECK:   ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)16492 void test_vst2_p16(poly16_t * a, poly16x4x2_t b) {
16493   vst2_p16(a, b);
16494 }
16495 
16496 // CHECK-LABEL: @test_vst2q_lane_u16(
16497 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
16498 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
16499 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
16500 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16501 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16502 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
16503 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
16504 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16505 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16506 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16507 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16508 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16509 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16510 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16511 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16512 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16513 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16514 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16515 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16516 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16517 // CHECK:   ret void
test_vst2q_lane_u16(uint16_t * a,uint16x8x2_t b)16518 void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) {
16519   vst2q_lane_u16(a, b, 7);
16520 }
16521 
16522 // CHECK-LABEL: @test_vst2q_lane_u32(
16523 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
16524 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
16525 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
16526 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16527 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16528 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
16529 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
16530 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16531 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16532 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16533 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16534 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16535 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16536 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16537 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16538 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16539 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16540 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16541 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16542 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
16543 // CHECK:   ret void
test_vst2q_lane_u32(uint32_t * a,uint32x4x2_t b)16544 void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) {
16545   vst2q_lane_u32(a, b, 3);
16546 }
16547 
16548 // CHECK-LABEL: @test_vst2q_lane_s16(
16549 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
16550 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
16551 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
16552 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16553 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16554 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
16555 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
16556 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16557 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16558 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16559 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16560 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16561 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16562 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16563 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16564 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16565 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16566 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16567 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16568 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16569 // CHECK:   ret void
test_vst2q_lane_s16(int16_t * a,int16x8x2_t b)16570 void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) {
16571   vst2q_lane_s16(a, b, 7);
16572 }
16573 
16574 // CHECK-LABEL: @test_vst2q_lane_s32(
16575 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
16576 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
16577 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
16578 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16579 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16580 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
16581 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
16582 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16583 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16584 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16585 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16586 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16587 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16588 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16589 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16590 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16591 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16592 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16593 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16594 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
16595 // CHECK:   ret void
test_vst2q_lane_s32(int32_t * a,int32x4x2_t b)16596 void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) {
16597   vst2q_lane_s32(a, b, 3);
16598 }
16599 
16600 // CHECK-LABEL: @test_vst2q_lane_f16(
16601 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
16602 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
16603 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
16604 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
16605 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16606 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
16607 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
16608 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16609 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
16610 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16611 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
16612 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16613 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16614 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16615 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
16616 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16617 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16618 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16619 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16620 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 7, i32 2)
16621 // CHECK:   ret void
test_vst2q_lane_f16(float16_t * a,float16x8x2_t b)16622 void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) {
16623   vst2q_lane_f16(a, b, 7);
16624 }
16625 
16626 // CHECK-LABEL: @test_vst2q_lane_f32(
16627 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
16628 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
16629 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
16630 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
16631 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16632 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
16633 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
16634 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16635 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
16636 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16637 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
16638 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16639 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16640 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16641 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
16642 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16643 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16644 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16645 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16646 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 3, i32 4)
16647 // CHECK:   ret void
test_vst2q_lane_f32(float32_t * a,float32x4x2_t b)16648 void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) {
16649   vst2q_lane_f32(a, b, 3);
16650 }
16651 
16652 // CHECK-LABEL: @test_vst2q_lane_p16(
16653 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
16654 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
16655 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
16656 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16657 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16658 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
16659 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
16660 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16661 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16662 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16663 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16664 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16665 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16666 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16667 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16668 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16669 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16670 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16671 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16672 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16673 // CHECK:   ret void
test_vst2q_lane_p16(poly16_t * a,poly16x8x2_t b)16674 void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) {
16675   vst2q_lane_p16(a, b, 7);
16676 }
16677 
16678 // CHECK-LABEL: @test_vst2_lane_u8(
16679 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
16680 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
16681 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
16682 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16683 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16684 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
16685 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
16686 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16687 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16688 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16689 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16690 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16691 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16692 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16693 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16694 // CHECK:   ret void
test_vst2_lane_u8(uint8_t * a,uint8x8x2_t b)16695 void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) {
16696   vst2_lane_u8(a, b, 7);
16697 }
16698 
16699 // CHECK-LABEL: @test_vst2_lane_u16(
16700 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
16701 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
16702 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
16703 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16704 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16705 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
16706 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
16707 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16708 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16709 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16710 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16711 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16712 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16713 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16714 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16715 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16716 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16717 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16718 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16719 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16720 // CHECK:   ret void
test_vst2_lane_u16(uint16_t * a,uint16x4x2_t b)16721 void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) {
16722   vst2_lane_u16(a, b, 3);
16723 }
16724 
16725 // CHECK-LABEL: @test_vst2_lane_u32(
16726 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
16727 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
16728 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
16729 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16730 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16731 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
16732 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
16733 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16734 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16735 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16736 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16737 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16738 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16739 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16740 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16741 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16742 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16743 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16744 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16745 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16746 // CHECK:   ret void
test_vst2_lane_u32(uint32_t * a,uint32x2x2_t b)16747 void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) {
16748   vst2_lane_u32(a, b, 1);
16749 }
16750 
16751 // CHECK-LABEL: @test_vst2_lane_s8(
16752 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
16753 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
16754 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
16755 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16756 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16757 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
16758 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
16759 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16760 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16761 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16762 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16763 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16764 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16765 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16766 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16767 // CHECK:   ret void
test_vst2_lane_s8(int8_t * a,int8x8x2_t b)16768 void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) {
16769   vst2_lane_s8(a, b, 7);
16770 }
16771 
16772 // CHECK-LABEL: @test_vst2_lane_s16(
16773 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
16774 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
16775 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
16776 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16777 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16778 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
16779 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
16780 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16781 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16782 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16783 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16784 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16785 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16786 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16787 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16788 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16789 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16790 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16791 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16792 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16793 // CHECK:   ret void
test_vst2_lane_s16(int16_t * a,int16x4x2_t b)16794 void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) {
16795   vst2_lane_s16(a, b, 3);
16796 }
16797 
16798 // CHECK-LABEL: @test_vst2_lane_s32(
16799 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
16800 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
16801 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
16802 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16803 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16804 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
16805 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
16806 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16807 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16808 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16809 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16810 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16811 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16812 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16813 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16814 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16815 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16816 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16817 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16818 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16819 // CHECK:   ret void
test_vst2_lane_s32(int32_t * a,int32x2x2_t b)16820 void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) {
16821   vst2_lane_s32(a, b, 1);
16822 }
16823 
16824 // CHECK-LABEL: @test_vst2_lane_f16(
16825 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16826 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16827 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16828 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
16829 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16830 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16831 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16832 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16833 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
16834 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16835 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
16836 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16837 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16838 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16839 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
16840 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16841 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16842 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16843 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16844 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 3, i32 2)
16845 // CHECK:   ret void
test_vst2_lane_f16(float16_t * a,float16x4x2_t b)16846 void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) {
16847   vst2_lane_f16(a, b, 3);
16848 }
16849 
16850 // CHECK-LABEL: @test_vst2_lane_f32(
16851 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16852 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16853 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16854 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
16855 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16856 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16857 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16858 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16859 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
16860 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16861 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
16862 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16863 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16864 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16865 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
16866 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16867 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16868 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16869 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16870 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 1, i32 4)
16871 // CHECK:   ret void
test_vst2_lane_f32(float32_t * a,float32x2x2_t b)16872 void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) {
16873   vst2_lane_f32(a, b, 1);
16874 }
16875 
16876 // CHECK-LABEL: @test_vst2_lane_p8(
16877 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16878 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16879 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16880 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16881 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16882 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16883 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16884 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16885 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16886 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16887 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16888 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16889 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16890 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16891 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16892 // CHECK:   ret void
test_vst2_lane_p8(poly8_t * a,poly8x8x2_t b)16893 void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) {
16894   vst2_lane_p8(a, b, 7);
16895 }
16896 
16897 // CHECK-LABEL: @test_vst2_lane_p16(
16898 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16899 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16900 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16901 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16902 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16903 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16904 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16905 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16906 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16907 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16908 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16909 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16910 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16911 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16912 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16913 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16914 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16915 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16916 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16917 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16918 // CHECK:   ret void
test_vst2_lane_p16(poly16_t * a,poly16x4x2_t b)16919 void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) {
16920   vst2_lane_p16(a, b, 3);
16921 }
16922 
16923 // CHECK-LABEL: @test_vst3q_u8(
16924 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
16925 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
16926 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
16927 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
16928 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
16929 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
16930 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
16931 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
16932 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16933 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
16934 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16935 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16936 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16937 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16938 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16939 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
16940 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16941 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
16942 // CHECK:   ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)16943 void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) {
16944   vst3q_u8(a, b);
16945 }
16946 
16947 // CHECK-LABEL: @test_vst3q_u16(
16948 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
16949 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
16950 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
16951 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
16952 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
16953 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
16954 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
16955 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
16956 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16957 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16958 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
16959 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16960 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16961 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16962 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16963 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16964 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16965 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16966 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
16967 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16968 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
16969 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16970 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16971 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
16972 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
16973 // CHECK:   ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)16974 void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) {
16975   vst3q_u16(a, b);
16976 }
16977 
16978 // CHECK-LABEL: @test_vst3q_u32(
16979 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
16980 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
16981 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
16982 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
16983 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
16984 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
16985 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
16986 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
16987 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16988 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16989 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
16990 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16991 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16992 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16993 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16994 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16995 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16996 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16997 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
16998 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
16999 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17000 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17001 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17002 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17003 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
17004 // CHECK:   ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)17005 void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) {
17006   vst3q_u32(a, b);
17007 }
17008 
17009 // CHECK-LABEL: @test_vst3q_s8(
17010 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
17011 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
17012 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
17013 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
17014 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17015 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
17016 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
17017 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17018 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17019 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
17020 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17021 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17022 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
17023 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17024 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17025 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
17026 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17027 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
17028 // CHECK:   ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)17029 void test_vst3q_s8(int8_t * a, int8x16x3_t b) {
17030   vst3q_s8(a, b);
17031 }
17032 
17033 // CHECK-LABEL: @test_vst3q_s16(
17034 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
17035 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
17036 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
17037 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17038 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17039 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
17040 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
17041 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17042 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17043 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17044 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17045 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17046 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17047 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17048 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17049 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17050 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17051 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17052 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17053 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17054 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17055 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17056 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17057 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17058 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17059 // CHECK:   ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)17060 void test_vst3q_s16(int16_t * a, int16x8x3_t b) {
17061   vst3q_s16(a, b);
17062 }
17063 
17064 // CHECK-LABEL: @test_vst3q_s32(
17065 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
17066 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
17067 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
17068 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17069 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17070 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
17071 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
17072 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17073 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17074 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17075 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17076 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17077 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17078 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17079 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17080 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17081 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17082 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17083 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17084 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17085 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17086 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17087 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17088 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17089 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
17090 // CHECK:   ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)17091 void test_vst3q_s32(int32_t * a, int32x4x3_t b) {
17092   vst3q_s32(a, b);
17093 }
17094 
17095 // CHECK-LABEL: @test_vst3q_f16(
17096 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
17097 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
17098 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
17099 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
17100 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17101 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
17102 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
17103 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17104 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
17105 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17106 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
17107 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17108 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
17109 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17110 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
17111 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17112 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
17113 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17114 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
17115 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17116 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17117 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17118 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17119 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17120 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 2)
17121 // CHECK:   ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)17122 void test_vst3q_f16(float16_t * a, float16x8x3_t b) {
17123   vst3q_f16(a, b);
17124 }
17125 
17126 // CHECK-LABEL: @test_vst3q_f32(
17127 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
17128 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
17129 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
17130 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
17131 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17132 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
17133 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
17134 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17135 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
17136 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17137 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
17138 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17139 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17140 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17141 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
17142 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17143 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17144 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17145 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
17146 // CHECK:   [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17147 // CHECK:   [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17148 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17149 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17150 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17151 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 4)
17152 // CHECK:   ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)17153 void test_vst3q_f32(float32_t * a, float32x4x3_t b) {
17154   vst3q_f32(a, b);
17155 }
17156 
17157 // CHECK-LABEL: @test_vst3q_p8(
17158 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
17159 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
17160 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
17161 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
17162 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17163 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
17164 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
17165 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17166 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17167 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
17168 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17169 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17170 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
17171 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17172 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17173 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
17174 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17175 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
17176 // CHECK:   ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)17177 void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) {
17178   vst3q_p8(a, b);
17179 }
17180 
17181 // CHECK-LABEL: @test_vst3q_p16(
17182 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
17183 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
17184 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
17185 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17186 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17187 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
17188 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
17189 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17190 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17191 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17192 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17193 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17194 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17195 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17196 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17197 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17198 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17199 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17200 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17201 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17202 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17203 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17204 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17205 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17206 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17207 // CHECK:   ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)17208 void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) {
17209   vst3q_p16(a, b);
17210 }
17211 
17212 // CHECK-LABEL: @test_vst3_u8(
17213 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
17214 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
17215 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
17216 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17217 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17218 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
17219 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
17220 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17221 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17222 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17223 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17224 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17225 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17226 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17227 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17228 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17229 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17230 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17231 // CHECK:   ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)17232 void test_vst3_u8(uint8_t * a, uint8x8x3_t b) {
17233   vst3_u8(a, b);
17234 }
17235 
17236 // CHECK-LABEL: @test_vst3_u16(
17237 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
17238 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
17239 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
17240 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17241 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17242 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
17243 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
17244 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17245 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17246 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17247 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17248 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17249 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17250 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17251 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17252 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17253 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17254 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17255 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17256 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17257 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17258 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17259 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17260 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17261 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17262 // CHECK:   ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)17263 void test_vst3_u16(uint16_t * a, uint16x4x3_t b) {
17264   vst3_u16(a, b);
17265 }
17266 
17267 // CHECK-LABEL: @test_vst3_u32(
17268 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
17269 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
17270 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
17271 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17272 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17273 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
17274 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
17275 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17276 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17277 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17278 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17279 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17280 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17281 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17282 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17283 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17284 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17285 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17286 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17287 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17288 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17289 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17290 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17291 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17292 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
17293 // CHECK:   ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)17294 void test_vst3_u32(uint32_t * a, uint32x2x3_t b) {
17295   vst3_u32(a, b);
17296 }
17297 
17298 // CHECK-LABEL: @test_vst3_u64(
17299 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
17300 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
17301 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
17302 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
17303 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17304 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
17305 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
17306 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17307 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
17308 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17309 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
17310 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17311 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17312 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17313 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
17314 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17315 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17316 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17317 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
17318 // CHECK:   [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17319 // CHECK:   [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17320 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17321 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17322 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17323 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
17324 // CHECK:   ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)17325 void test_vst3_u64(uint64_t * a, uint64x1x3_t b) {
17326   vst3_u64(a, b);
17327 }
17328 
17329 // CHECK-LABEL: @test_vst3_s8(
17330 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
17331 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
17332 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
17333 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17334 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17335 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
17336 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
17337 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17338 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17339 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17340 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17341 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17342 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17343 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17344 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17345 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17346 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17347 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17348 // CHECK:   ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)17349 void test_vst3_s8(int8_t * a, int8x8x3_t b) {
17350   vst3_s8(a, b);
17351 }
17352 
17353 // CHECK-LABEL: @test_vst3_s16(
17354 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
17355 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
17356 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
17357 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17358 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17359 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
17360 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
17361 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17362 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17363 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17364 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17365 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17366 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17367 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17368 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17369 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17370 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17371 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17372 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17373 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17374 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17375 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17376 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17377 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17378 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17379 // CHECK:   ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)17380 void test_vst3_s16(int16_t * a, int16x4x3_t b) {
17381   vst3_s16(a, b);
17382 }
17383 
17384 // CHECK-LABEL: @test_vst3_s32(
17385 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
17386 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
17387 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
17388 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17389 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17390 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
17391 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
17392 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17393 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17394 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17395 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17396 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17397 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17398 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17399 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17400 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17401 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17402 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17403 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17404 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17405 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17406 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17407 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17408 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17409 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
17410 // CHECK:   ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)17411 void test_vst3_s32(int32_t * a, int32x2x3_t b) {
17412   vst3_s32(a, b);
17413 }
17414 
17415 // CHECK-LABEL: @test_vst3_s64(
17416 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
17417 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
17418 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
17419 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
17420 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17421 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
17422 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
17423 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17424 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
17425 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17426 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
17427 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17428 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17429 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17430 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
17431 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17432 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17433 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17434 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
17435 // CHECK:   [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17436 // CHECK:   [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17437 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17438 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17439 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17440 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
17441 // CHECK:   ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)17442 void test_vst3_s64(int64_t * a, int64x1x3_t b) {
17443   vst3_s64(a, b);
17444 }
17445 
17446 // CHECK-LABEL: @test_vst3_f16(
17447 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
17448 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
17449 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
17450 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
17451 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17452 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
17453 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
17454 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17455 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
17456 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17457 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
17458 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
17459 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
17460 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17461 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
17462 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
17463 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
17464 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17465 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
17466 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
17467 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
17468 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
17469 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
17470 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
17471 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 2)
17472 // CHECK:   ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)17473 void test_vst3_f16(float16_t * a, float16x4x3_t b) {
17474   vst3_f16(a, b);
17475 }
17476 
17477 // CHECK-LABEL: @test_vst3_f32(
17478 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
17479 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
17480 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
17481 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
17482 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17483 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
17484 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
17485 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17486 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
17487 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17488 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
17489 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
17490 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
17491 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17492 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
17493 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
17494 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
17495 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17496 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
17497 // CHECK:   [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
17498 // CHECK:   [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
17499 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
17500 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
17501 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
17502 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 4)
17503 // CHECK:   ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)17504 void test_vst3_f32(float32_t * a, float32x2x3_t b) {
17505   vst3_f32(a, b);
17506 }
17507 
17508 // CHECK-LABEL: @test_vst3_p8(
17509 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
17510 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
17511 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
17512 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17513 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17514 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
17515 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
17516 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17517 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17518 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17519 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17520 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17521 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17522 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17523 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17524 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17525 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17526 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17527 // CHECK:   ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)17528 void test_vst3_p8(poly8_t * a, poly8x8x3_t b) {
17529   vst3_p8(a, b);
17530 }
17531 
17532 // CHECK-LABEL: @test_vst3_p16(
17533 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
17534 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
17535 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
17536 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17537 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17538 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
17539 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
17540 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17541 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17542 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17543 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17544 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17545 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17546 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17547 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17548 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17549 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17550 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17551 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17552 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17553 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17554 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17555 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17556 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17557 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17558 // CHECK:   ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)17559 void test_vst3_p16(poly16_t * a, poly16x4x3_t b) {
17560   vst3_p16(a, b);
17561 }
17562 
17563 // CHECK-LABEL: @test_vst3q_lane_u16(
17564 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
17565 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
17566 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
17567 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17568 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17569 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
17570 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
17571 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17572 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17573 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17574 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17575 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17576 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17577 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17578 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17579 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17580 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17581 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17582 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17583 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17584 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17585 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17586 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17587 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17588 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17589 // CHECK:   ret void
test_vst3q_lane_u16(uint16_t * a,uint16x8x3_t b)17590 void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) {
17591   vst3q_lane_u16(a, b, 7);
17592 }
17593 
17594 // CHECK-LABEL: @test_vst3q_lane_u32(
17595 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
17596 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
17597 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
17598 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17599 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17600 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
17601 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
17602 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17603 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17604 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17605 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17606 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17607 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17608 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17609 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17610 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17611 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17612 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17613 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17614 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17615 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17616 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17617 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17618 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17619 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
17620 // CHECK:   ret void
test_vst3q_lane_u32(uint32_t * a,uint32x4x3_t b)17621 void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) {
17622   vst3q_lane_u32(a, b, 3);
17623 }
17624 
17625 // CHECK-LABEL: @test_vst3q_lane_s16(
17626 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
17627 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
17628 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
17629 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17630 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17631 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
17632 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
17633 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17634 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17635 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17636 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17637 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17638 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17639 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17640 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17641 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17642 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17643 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17644 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17645 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17646 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17647 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17648 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17649 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17650 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17651 // CHECK:   ret void
test_vst3q_lane_s16(int16_t * a,int16x8x3_t b)17652 void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) {
17653   vst3q_lane_s16(a, b, 7);
17654 }
17655 
17656 // CHECK-LABEL: @test_vst3q_lane_s32(
17657 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
17658 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
17659 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
17660 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17661 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17662 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
17663 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
17664 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17665 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17666 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17667 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17668 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17669 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17670 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17671 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17672 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17673 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17674 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17675 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17676 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17677 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17678 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17679 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17680 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17681 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
17682 // CHECK:   ret void
test_vst3q_lane_s32(int32_t * a,int32x4x3_t b)17683 void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) {
17684   vst3q_lane_s32(a, b, 3);
17685 }
17686 
17687 // CHECK-LABEL: @test_vst3q_lane_f16(
17688 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
17689 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
17690 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
17691 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
17692 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17693 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
17694 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
17695 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17696 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
17697 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17698 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
17699 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17700 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
17701 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17702 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
17703 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17704 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
17705 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17706 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
17707 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17708 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17709 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17710 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17711 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17712 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 7, i32 2)
17713 // CHECK:   ret void
test_vst3q_lane_f16(float16_t * a,float16x8x3_t b)17714 void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) {
17715   vst3q_lane_f16(a, b, 7);
17716 }
17717 
17718 // CHECK-LABEL: @test_vst3q_lane_f32(
17719 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
17720 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
17721 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
17722 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
17723 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17724 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
17725 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
17726 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17727 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
17728 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17729 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
17730 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17731 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17732 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17733 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
17734 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17735 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17736 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17737 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
17738 // CHECK:   [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17739 // CHECK:   [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17740 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17741 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17742 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17743 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 3, i32 4)
17744 // CHECK:   ret void
test_vst3q_lane_f32(float32_t * a,float32x4x3_t b)17745 void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) {
17746   vst3q_lane_f32(a, b, 3);
17747 }
17748 
17749 // CHECK-LABEL: @test_vst3q_lane_p16(
17750 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
17751 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
17752 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
17753 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17754 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17755 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
17756 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
17757 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17758 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17759 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17760 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17761 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17762 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17763 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17764 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17765 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17766 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17767 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17768 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17769 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17770 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17771 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17772 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17773 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17774 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17775 // CHECK:   ret void
test_vst3q_lane_p16(poly16_t * a,poly16x8x3_t b)17776 void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) {
17777   vst3q_lane_p16(a, b, 7);
17778 }
17779 
17780 // CHECK-LABEL: @test_vst3_lane_u8(
17781 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
17782 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
17783 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
17784 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17785 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17786 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
17787 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
17788 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17789 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17790 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17791 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17792 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17793 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17794 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17795 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17796 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17797 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17798 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17799 // CHECK:   ret void
test_vst3_lane_u8(uint8_t * a,uint8x8x3_t b)17800 void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) {
17801   vst3_lane_u8(a, b, 7);
17802 }
17803 
17804 // CHECK-LABEL: @test_vst3_lane_u16(
17805 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
17806 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
17807 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
17808 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17809 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17810 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
17811 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
17812 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17813 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17814 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17815 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17816 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17817 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17818 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17819 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17820 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17821 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17822 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17823 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17824 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17825 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17826 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17827 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17828 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17829 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17830 // CHECK:   ret void
test_vst3_lane_u16(uint16_t * a,uint16x4x3_t b)17831 void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) {
17832   vst3_lane_u16(a, b, 3);
17833 }
17834 
17835 // CHECK-LABEL: @test_vst3_lane_u32(
17836 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
17837 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
17838 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
17839 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17840 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17841 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
17842 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
17843 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17844 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17845 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17846 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17847 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17848 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17849 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17850 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17851 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17852 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17853 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17854 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17855 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17856 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17857 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17858 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17859 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17860 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17861 // CHECK:   ret void
test_vst3_lane_u32(uint32_t * a,uint32x2x3_t b)17862 void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) {
17863   vst3_lane_u32(a, b, 1);
17864 }
17865 
17866 // CHECK-LABEL: @test_vst3_lane_s8(
17867 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
17868 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
17869 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
17870 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17871 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17872 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
17873 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
17874 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17875 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17876 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17877 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17878 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17879 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17880 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17881 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17882 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17883 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17884 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17885 // CHECK:   ret void
test_vst3_lane_s8(int8_t * a,int8x8x3_t b)17886 void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) {
17887   vst3_lane_s8(a, b, 7);
17888 }
17889 
17890 // CHECK-LABEL: @test_vst3_lane_s16(
17891 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
17892 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
17893 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
17894 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17895 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17896 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
17897 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
17898 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17899 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17900 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17901 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17902 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17903 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17904 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17905 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17906 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17907 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17908 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17909 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17910 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17911 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17912 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17913 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17914 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17915 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17916 // CHECK:   ret void
test_vst3_lane_s16(int16_t * a,int16x4x3_t b)17917 void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) {
17918   vst3_lane_s16(a, b, 3);
17919 }
17920 
17921 // CHECK-LABEL: @test_vst3_lane_s32(
17922 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
17923 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
17924 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
17925 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17926 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17927 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
17928 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
17929 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17930 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17931 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17932 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17933 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17934 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17935 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17936 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17937 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17938 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17939 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17940 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17941 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17942 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17943 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17944 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17945 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17946 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17947 // CHECK:   ret void
test_vst3_lane_s32(int32_t * a,int32x2x3_t b)17948 void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) {
17949   vst3_lane_s32(a, b, 1);
17950 }
17951 
17952 // CHECK-LABEL: @test_vst3_lane_f16(
17953 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
17954 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
17955 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
17956 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
17957 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17958 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
17959 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
17960 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17961 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
17962 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17963 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
17964 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
17965 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
17966 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17967 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
17968 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
17969 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
17970 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17971 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
17972 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
17973 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
17974 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
17975 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
17976 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
17977 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 3, i32 2)
17978 // CHECK:   ret void
test_vst3_lane_f16(float16_t * a,float16x4x3_t b)17979 void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) {
17980   vst3_lane_f16(a, b, 3);
17981 }
17982 
17983 // CHECK-LABEL: @test_vst3_lane_f32(
17984 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
17985 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
17986 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
17987 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
17988 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17989 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
17990 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
17991 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17992 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
17993 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17994 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
17995 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
17996 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
17997 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17998 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
17999 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
18000 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18001 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
18002 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
18003 // CHECK:   [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
18004 // CHECK:   [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
18005 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18006 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18007 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
18008 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 1, i32 4)
18009 // CHECK:   ret void
test_vst3_lane_f32(float32_t * a,float32x2x3_t b)18010 void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) {
18011   vst3_lane_f32(a, b, 1);
18012 }
18013 
18014 // CHECK-LABEL: @test_vst3_lane_p8(
18015 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
18016 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
18017 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
18018 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
18019 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18020 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
18021 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
18022 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18023 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18024 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
18025 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18026 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18027 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18028 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18029 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18030 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18031 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18032 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
18033 // CHECK:   ret void
test_vst3_lane_p8(poly8_t * a,poly8x8x3_t b)18034 void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) {
18035   vst3_lane_p8(a, b, 7);
18036 }
18037 
18038 // CHECK-LABEL: @test_vst3_lane_p16(
18039 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
18040 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
18041 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
18042 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
18043 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18044 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
18045 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
18046 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18047 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18048 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18049 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
18050 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18051 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18052 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18053 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18054 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18055 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18056 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18057 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18058 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18059 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18060 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18061 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18062 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18063 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
18064 // CHECK:   ret void
test_vst3_lane_p16(poly16_t * a,poly16x4x3_t b)18065 void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) {
18066   vst3_lane_p16(a, b, 3);
18067 }
18068 
18069 // CHECK-LABEL: @test_vst4q_u8(
18070 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
18071 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
18072 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
18073 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18074 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18075 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
18076 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
18077 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18078 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18079 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18080 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18081 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18082 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18083 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18084 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18085 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18086 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18087 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18088 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18089 // CHECK:   [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18090 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18091 // CHECK:   ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)18092 void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) {
18093   vst4q_u8(a, b);
18094 }
18095 
18096 // CHECK-LABEL: @test_vst4q_u16(
18097 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
18098 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
18099 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
18100 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18101 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18102 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
18103 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
18104 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18105 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18106 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18107 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18108 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18109 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18110 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18111 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18112 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18113 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18114 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18115 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18116 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18117 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18118 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18119 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18120 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18121 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18122 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18123 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18124 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18125 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18126 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18127 // CHECK:   ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)18128 void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) {
18129   vst4q_u16(a, b);
18130 }
18131 
18132 // CHECK-LABEL: @test_vst4q_u32(
18133 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
18134 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
18135 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
18136 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18137 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18138 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
18139 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
18140 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18141 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18142 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18143 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18144 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18145 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18146 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18147 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18148 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18149 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18150 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18151 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18152 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18153 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18154 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18155 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18156 // CHECK:   [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18157 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18158 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18159 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18160 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18161 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18162 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
18163 // CHECK:   ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)18164 void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) {
18165   vst4q_u32(a, b);
18166 }
18167 
18168 // CHECK-LABEL: @test_vst4q_s8(
18169 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
18170 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
18171 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
18172 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18173 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18174 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
18175 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
18176 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18177 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18178 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18179 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18180 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18181 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18182 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18183 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18184 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18185 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18186 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18187 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18188 // CHECK:   [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18189 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18190 // CHECK:   ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)18191 void test_vst4q_s8(int8_t * a, int8x16x4_t b) {
18192   vst4q_s8(a, b);
18193 }
18194 
18195 // CHECK-LABEL: @test_vst4q_s16(
18196 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
18197 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
18198 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
18199 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18200 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18201 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
18202 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
18203 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18204 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18205 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18206 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18207 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18208 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18209 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18210 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18211 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18212 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18213 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18214 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18215 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18216 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18217 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18218 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18219 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18220 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18221 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18222 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18223 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18224 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18225 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18226 // CHECK:   ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)18227 void test_vst4q_s16(int16_t * a, int16x8x4_t b) {
18228   vst4q_s16(a, b);
18229 }
18230 
18231 // CHECK-LABEL: @test_vst4q_s32(
18232 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
18233 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
18234 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
18235 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18236 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18237 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
18238 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
18239 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18240 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18241 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18242 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18243 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18244 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18245 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18246 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18247 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18248 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18249 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18250 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18251 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18252 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18253 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18254 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18255 // CHECK:   [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18256 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18257 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18258 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18259 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18260 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18261 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
18262 // CHECK:   ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)18263 void test_vst4q_s32(int32_t * a, int32x4x4_t b) {
18264   vst4q_s32(a, b);
18265 }
18266 
18267 // CHECK-LABEL: @test_vst4q_f16(
18268 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
18269 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
18270 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
18271 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
18272 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18273 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
18274 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
18275 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18276 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
18277 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18278 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
18279 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
18280 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
18281 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18282 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
18283 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
18284 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
18285 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18286 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
18287 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
18288 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
18289 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18290 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
18291 // CHECK:   [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
18292 // CHECK:   [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
18293 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
18294 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
18295 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
18296 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
18297 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 2)
18298 // CHECK:   ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)18299 void test_vst4q_f16(float16_t * a, float16x8x4_t b) {
18300   vst4q_f16(a, b);
18301 }
18302 
18303 // CHECK-LABEL: @test_vst4q_f32(
18304 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
18305 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
18306 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
18307 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
18308 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18309 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
18310 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
18311 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18312 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
18313 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18314 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
18315 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
18316 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
18317 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18318 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
18319 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
18320 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
18321 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18322 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
18323 // CHECK:   [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
18324 // CHECK:   [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
18325 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18326 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
18327 // CHECK:   [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
18328 // CHECK:   [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
18329 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
18330 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
18331 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
18332 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
18333 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 4)
18334 // CHECK:   ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)18335 void test_vst4q_f32(float32_t * a, float32x4x4_t b) {
18336   vst4q_f32(a, b);
18337 }
18338 
18339 // CHECK-LABEL: @test_vst4q_p8(
18340 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
18341 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
18342 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
18343 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18344 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18345 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
18346 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
18347 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18348 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18349 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18350 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18351 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18352 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18353 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18354 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18355 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18356 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18357 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18358 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18359 // CHECK:   [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18360 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18361 // CHECK:   ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)18362 void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) {
18363   vst4q_p8(a, b);
18364 }
18365 
18366 // CHECK-LABEL: @test_vst4q_p16(
18367 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
18368 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
18369 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
18370 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18371 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18372 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
18373 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
18374 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18375 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18376 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18377 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18378 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18379 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18380 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18381 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18382 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18383 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18384 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18385 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18386 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18387 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18388 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18389 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18390 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18391 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18392 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18393 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18394 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18395 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18396 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18397 // CHECK:   ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)18398 void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) {
18399   vst4q_p16(a, b);
18400 }
18401 
18402 // CHECK-LABEL: @test_vst4_u8(
18403 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
18404 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
18405 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
18406 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18407 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18408 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
18409 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
18410 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18411 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18412 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18413 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18414 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18415 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18416 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18417 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18418 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18419 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18420 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18421 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18422 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18423 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18424 // CHECK:   ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)18425 void test_vst4_u8(uint8_t * a, uint8x8x4_t b) {
18426   vst4_u8(a, b);
18427 }
18428 
18429 // CHECK-LABEL: @test_vst4_u16(
18430 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
18431 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
18432 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
18433 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18434 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18435 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
18436 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
18437 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18438 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18439 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18440 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18441 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18442 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18443 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18444 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18445 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18446 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18447 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18448 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18449 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18450 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18451 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18452 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18453 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18454 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18455 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18456 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18457 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18458 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18459 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18460 // CHECK:   ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)18461 void test_vst4_u16(uint16_t * a, uint16x4x4_t b) {
18462   vst4_u16(a, b);
18463 }
18464 
18465 // CHECK-LABEL: @test_vst4_u32(
18466 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
18467 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
18468 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
18469 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18470 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18471 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
18472 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
18473 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18474 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18475 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18476 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
18477 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18478 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18479 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18480 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18481 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18482 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18483 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18484 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
18485 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
18486 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18487 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18488 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
18489 // CHECK:   [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
18490 // CHECK:   [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18491 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18492 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18493 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18494 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18495 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
18496 // CHECK:   ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)18497 void test_vst4_u32(uint32_t * a, uint32x2x4_t b) {
18498   vst4_u32(a, b);
18499 }
18500 
18501 // CHECK-LABEL: @test_vst4_u64(
18502 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
18503 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
18504 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
18505 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
18506 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18507 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
18508 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
18509 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18510 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
18511 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18512 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
18513 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18514 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18515 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18516 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18517 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18518 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18519 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18520 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
18521 // CHECK:   [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
18522 // CHECK:   [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
18523 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18524 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
18525 // CHECK:   [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
18526 // CHECK:   [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
18527 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18528 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18529 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
18530 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
18531 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
18532 // CHECK:   ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)18533 void test_vst4_u64(uint64_t * a, uint64x1x4_t b) {
18534   vst4_u64(a, b);
18535 }
18536 
18537 // CHECK-LABEL: @test_vst4_s8(
18538 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
18539 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
18540 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
18541 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18542 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18543 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
18544 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
18545 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18546 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18547 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18548 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18549 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18550 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18551 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18552 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18553 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18554 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18555 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18556 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18557 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18558 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18559 // CHECK:   ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)18560 void test_vst4_s8(int8_t * a, int8x8x4_t b) {
18561   vst4_s8(a, b);
18562 }
18563 
18564 // CHECK-LABEL: @test_vst4_s16(
18565 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
18566 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
18567 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
18568 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18569 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18570 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
18571 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
18572 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18573 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18574 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18575 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18576 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18577 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18578 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18579 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18580 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18581 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18582 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18583 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18584 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18585 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18586 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18587 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18588 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18589 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18590 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18591 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18592 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18593 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18594 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18595 // CHECK:   ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)18596 void test_vst4_s16(int16_t * a, int16x4x4_t b) {
18597   vst4_s16(a, b);
18598 }
18599 
18600 // CHECK-LABEL: @test_vst4_s32(
18601 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
18602 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
18603 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
18604 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18605 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18606 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
18607 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
18608 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18609 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18610 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18611 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
18612 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18613 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18614 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18615 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18616 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18617 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18618 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18619 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
18620 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
18621 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18622 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18623 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
18624 // CHECK:   [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
18625 // CHECK:   [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18626 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18627 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18628 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18629 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18630 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
18631 // CHECK:   ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)18632 void test_vst4_s32(int32_t * a, int32x2x4_t b) {
18633   vst4_s32(a, b);
18634 }
18635 
18636 // CHECK-LABEL: @test_vst4_s64(
18637 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
18638 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
18639 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
18640 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
18641 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18642 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
18643 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
18644 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18645 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
18646 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18647 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
18648 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18649 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18650 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18651 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18652 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18653 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18654 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18655 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
18656 // CHECK:   [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
18657 // CHECK:   [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
18658 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18659 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
18660 // CHECK:   [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
18661 // CHECK:   [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
18662 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18663 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18664 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
18665 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
18666 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
18667 // CHECK:   ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)18668 void test_vst4_s64(int64_t * a, int64x1x4_t b) {
18669   vst4_s64(a, b);
18670 }
18671 
18672 // CHECK-LABEL: @test_vst4_f16(
18673 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
18674 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
18675 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
18676 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
18677 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18678 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
18679 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
18680 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18681 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
18682 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18683 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
18684 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
18685 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
18686 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18687 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
18688 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
18689 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
18690 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18691 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
18692 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
18693 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
18694 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18695 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
18696 // CHECK:   [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
18697 // CHECK:   [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
18698 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
18699 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
18700 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
18701 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
18702 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 2)
18703 // CHECK:   ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)18704 void test_vst4_f16(float16_t * a, float16x4x4_t b) {
18705   vst4_f16(a, b);
18706 }
18707 
18708 // CHECK-LABEL: @test_vst4_f32(
18709 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
18710 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
18711 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
18712 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
18713 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18714 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
18715 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
18716 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18717 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
18718 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18719 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
18720 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
18721 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
18722 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18723 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
18724 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
18725 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18726 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18727 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
18728 // CHECK:   [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
18729 // CHECK:   [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
18730 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18731 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
18732 // CHECK:   [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
18733 // CHECK:   [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
18734 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18735 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18736 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
18737 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
18738 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 4)
18739 // CHECK:   ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)18740 void test_vst4_f32(float32_t * a, float32x2x4_t b) {
18741   vst4_f32(a, b);
18742 }
18743 
18744 // CHECK-LABEL: @test_vst4_p8(
18745 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
18746 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
18747 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
18748 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18749 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18750 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
18751 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
18752 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18753 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18754 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18755 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18756 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18757 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18758 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18759 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18760 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18761 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18762 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18763 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18764 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18765 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18766 // CHECK:   ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)18767 void test_vst4_p8(poly8_t * a, poly8x8x4_t b) {
18768   vst4_p8(a, b);
18769 }
18770 
18771 // CHECK-LABEL: @test_vst4_p16(
18772 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
18773 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
18774 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
18775 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18776 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18777 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
18778 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
18779 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18780 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18781 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18782 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18783 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18784 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18785 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18786 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18787 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18788 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18789 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18790 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18791 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18792 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18793 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18794 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18795 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18796 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18797 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18798 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18799 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18800 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18801 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18802 // CHECK:   ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)18803 void test_vst4_p16(poly16_t * a, poly16x4x4_t b) {
18804   vst4_p16(a, b);
18805 }
18806 
18807 // CHECK-LABEL: @test_vst4q_lane_u16(
18808 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
18809 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
18810 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
18811 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18812 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18813 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
18814 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
18815 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18816 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18817 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18818 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18819 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18820 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18821 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18822 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18823 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18824 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18825 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18826 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18827 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18828 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18829 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18830 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18831 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18832 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18833 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18834 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18835 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18836 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18837 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18838 // CHECK:   ret void
test_vst4q_lane_u16(uint16_t * a,uint16x8x4_t b)18839 void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) {
18840   vst4q_lane_u16(a, b, 7);
18841 }
18842 
18843 // CHECK-LABEL: @test_vst4q_lane_u32(
18844 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
18845 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
18846 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
18847 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18848 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18849 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
18850 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
18851 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18852 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18853 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18854 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18855 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18856 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18857 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18858 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18859 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18860 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18861 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18862 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18863 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18864 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18865 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18866 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18867 // CHECK:   [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18868 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18869 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18870 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18871 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18872 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18873 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18874 // CHECK:   ret void
test_vst4q_lane_u32(uint32_t * a,uint32x4x4_t b)18875 void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) {
18876   vst4q_lane_u32(a, b, 3);
18877 }
18878 
18879 // CHECK-LABEL: @test_vst4q_lane_s16(
18880 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
18881 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
18882 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
18883 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18884 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18885 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
18886 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
18887 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18888 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18889 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18890 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18891 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18892 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18893 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18894 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18895 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18896 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18897 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18898 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18899 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18900 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18901 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18902 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18903 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18904 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18905 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18906 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18907 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18908 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18909 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18910 // CHECK:   ret void
test_vst4q_lane_s16(int16_t * a,int16x8x4_t b)18911 void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) {
18912   vst4q_lane_s16(a, b, 7);
18913 }
18914 
18915 // CHECK-LABEL: @test_vst4q_lane_s32(
18916 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
18917 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
18918 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
18919 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18920 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18921 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
18922 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
18923 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18924 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18925 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18926 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18927 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18928 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18929 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18930 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18931 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18932 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18933 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18934 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18935 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18936 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18937 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18938 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18939 // CHECK:   [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18940 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18941 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18942 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18943 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18944 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18945 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18946 // CHECK:   ret void
test_vst4q_lane_s32(int32_t * a,int32x4x4_t b)18947 void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) {
18948   vst4q_lane_s32(a, b, 3);
18949 }
18950 
18951 // CHECK-LABEL: @test_vst4q_lane_f16(
18952 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
18953 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
18954 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
18955 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
18956 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18957 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
18958 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
18959 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18960 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
18961 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18962 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
18963 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
18964 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
18965 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18966 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
18967 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
18968 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
18969 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18970 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
18971 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
18972 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
18973 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18974 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
18975 // CHECK:   [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
18976 // CHECK:   [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
18977 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
18978 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
18979 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
18980 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
18981 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 7, i32 2)
18982 // CHECK:   ret void
test_vst4q_lane_f16(float16_t * a,float16x8x4_t b)18983 void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) {
18984   vst4q_lane_f16(a, b, 7);
18985 }
18986 
18987 // CHECK-LABEL: @test_vst4q_lane_f32(
18988 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
18989 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
18990 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
18991 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
18992 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18993 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
18994 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
18995 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18996 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
18997 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18998 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
18999 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
19000 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
19001 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19002 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
19003 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
19004 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
19005 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19006 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
19007 // CHECK:   [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
19008 // CHECK:   [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
19009 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19010 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
19011 // CHECK:   [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
19012 // CHECK:   [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
19013 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
19014 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
19015 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
19016 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
19017 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 3, i32 4)
19018 // CHECK:   ret void
test_vst4q_lane_f32(float32_t * a,float32x4x4_t b)19019 void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) {
19020   vst4q_lane_f32(a, b, 3);
19021 }
19022 
19023 // CHECK-LABEL: @test_vst4q_lane_p16(
19024 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
19025 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
19026 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
19027 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
19028 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
19029 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
19030 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
19031 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
19032 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
19033 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19034 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
19035 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
19036 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
19037 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19038 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
19039 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
19040 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
19041 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19042 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
19043 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
19044 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
19045 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19046 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
19047 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
19048 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
19049 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19050 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19051 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
19052 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
19053 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
19054 // CHECK:   ret void
test_vst4q_lane_p16(poly16_t * a,poly16x8x4_t b)19055 void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) {
19056   vst4q_lane_p16(a, b, 7);
19057 }
19058 
19059 // CHECK-LABEL: @test_vst4_lane_u8(
19060 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
19061 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
19062 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
19063 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19064 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19065 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
19066 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
19067 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19068 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19069 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19070 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19071 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19072 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19073 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19074 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19075 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19076 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19077 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19078 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19079 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19080 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19081 // CHECK:   ret void
test_vst4_lane_u8(uint8_t * a,uint8x8x4_t b)19082 void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) {
19083   vst4_lane_u8(a, b, 7);
19084 }
19085 
19086 // CHECK-LABEL: @test_vst4_lane_u16(
19087 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
19088 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
19089 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
19090 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19091 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19092 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
19093 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
19094 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19095 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
19096 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19097 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19098 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19099 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19100 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19101 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19102 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19103 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19104 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19105 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19106 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19107 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19108 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19109 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19110 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19111 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19112 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19113 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19114 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19115 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19116 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19117 // CHECK:   ret void
test_vst4_lane_u16(uint16_t * a,uint16x4x4_t b)19118 void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) {
19119   vst4_lane_u16(a, b, 3);
19120 }
19121 
19122 // CHECK-LABEL: @test_vst4_lane_u32(
19123 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
19124 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
19125 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
19126 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19127 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19128 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
19129 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
19130 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19131 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
19132 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19133 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
19134 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19135 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19136 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19137 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19138 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19139 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19140 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19141 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19142 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19143 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19144 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19145 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
19146 // CHECK:   [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
19147 // CHECK:   [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
19148 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19149 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19150 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19151 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
19152 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
19153 // CHECK:   ret void
test_vst4_lane_u32(uint32_t * a,uint32x2x4_t b)19154 void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) {
19155   vst4_lane_u32(a, b, 1);
19156 }
19157 
19158 // CHECK-LABEL: @test_vst4_lane_s8(
19159 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
19160 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
19161 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
19162 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19163 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19164 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
19165 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
19166 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19167 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19168 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19169 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19170 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19171 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19172 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19173 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19174 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19175 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19176 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19177 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19178 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19179 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19180 // CHECK:   ret void
test_vst4_lane_s8(int8_t * a,int8x8x4_t b)19181 void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) {
19182   vst4_lane_s8(a, b, 7);
19183 }
19184 
19185 // CHECK-LABEL: @test_vst4_lane_s16(
19186 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
19187 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
19188 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
19189 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19190 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19191 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
19192 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
19193 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19194 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
19195 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19196 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19197 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19198 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19199 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19200 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19201 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19202 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19203 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19204 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19205 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19206 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19207 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19208 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19209 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19210 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19211 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19212 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19213 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19214 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19215 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19216 // CHECK:   ret void
test_vst4_lane_s16(int16_t * a,int16x4x4_t b)19217 void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) {
19218   vst4_lane_s16(a, b, 3);
19219 }
19220 
19221 // CHECK-LABEL: @test_vst4_lane_s32(
19222 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
19223 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
19224 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
19225 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19226 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19227 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
19228 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
19229 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19230 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
19231 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19232 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
19233 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19234 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19235 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19236 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19237 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19238 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19239 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19240 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19241 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19242 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19243 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19244 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
19245 // CHECK:   [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
19246 // CHECK:   [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
19247 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19248 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19249 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19250 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
19251 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
19252 // CHECK:   ret void
test_vst4_lane_s32(int32_t * a,int32x2x4_t b)19253 void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) {
19254   vst4_lane_s32(a, b, 1);
19255 }
19256 
19257 // CHECK-LABEL: @test_vst4_lane_f16(
19258 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
19259 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
19260 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
19261 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
19262 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19263 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
19264 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
19265 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19266 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
19267 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19268 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
19269 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
19270 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
19271 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19272 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
19273 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
19274 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
19275 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19276 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
19277 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
19278 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
19279 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19280 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
19281 // CHECK:   [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
19282 // CHECK:   [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
19283 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
19284 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
19285 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
19286 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
19287 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 3, i32 2)
19288 // CHECK:   ret void
test_vst4_lane_f16(float16_t * a,float16x4x4_t b)19289 void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
19290   vst4_lane_f16(a, b, 3);
19291 }
19292 
19293 // CHECK-LABEL: @test_vst4_lane_f32(
19294 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
19295 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
19296 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
19297 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
19298 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19299 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
19300 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
19301 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19302 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
19303 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19304 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
19305 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
19306 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
19307 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19308 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
19309 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
19310 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
19311 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19312 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
19313 // CHECK:   [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
19314 // CHECK:   [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
19315 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19316 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
19317 // CHECK:   [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
19318 // CHECK:   [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
19319 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
19320 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
19321 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
19322 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
19323 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 1, i32 4)
19324 // CHECK:   ret void
test_vst4_lane_f32(float32_t * a,float32x2x4_t b)19325 void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) {
19326   vst4_lane_f32(a, b, 1);
19327 }
19328 
19329 // CHECK-LABEL: @test_vst4_lane_p8(
19330 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
19331 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
19332 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
19333 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19334 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19335 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
19336 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
19337 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19338 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19339 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19340 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19341 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19342 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19343 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19344 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19345 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19346 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19347 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19348 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19349 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19350 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19351 // CHECK:   ret void
test_vst4_lane_p8(poly8_t * a,poly8x8x4_t b)19352 void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) {
19353   vst4_lane_p8(a, b, 7);
19354 }
19355 
19356 // CHECK-LABEL: @test_vst4_lane_p16(
19357 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
19358 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
19359 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
19360 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19361 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19362 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
19363 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
19364 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19365 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
19366 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19367 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19368 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19369 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19370 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19371 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19372 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19373 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19374 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19375 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19376 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19377 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19378 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19379 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19380 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19381 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19382 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19383 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19384 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19385 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19386 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19387 // CHECK:   ret void
test_vst4_lane_p16(poly16_t * a,poly16x4x4_t b)19388 void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) {
19389   vst4_lane_p16(a, b, 3);
19390 }
19391 
19392 // CHECK-LABEL: @test_vsub_s8(
19393 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %a, %b
19394 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t a,int8x8_t b)19395 int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) {
19396   return vsub_s8(a, b);
19397 }
19398 
19399 // CHECK-LABEL: @test_vsub_s16(
19400 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, %b
19401 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t a,int16x4_t b)19402 int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) {
19403   return vsub_s16(a, b);
19404 }
19405 
19406 // CHECK-LABEL: @test_vsub_s32(
19407 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, %b
19408 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t a,int32x2_t b)19409 int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) {
19410   return vsub_s32(a, b);
19411 }
19412 
19413 // CHECK-LABEL: @test_vsub_s64(
19414 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %a, %b
19415 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t a,int64x1_t b)19416 int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) {
19417   return vsub_s64(a, b);
19418 }
19419 
19420 // CHECK-LABEL: @test_vsub_f32(
19421 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %a, %b
19422 // CHECK:   ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t a,float32x2_t b)19423 float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) {
19424   return vsub_f32(a, b);
19425 }
19426 
19427 // CHECK-LABEL: @test_vsub_u8(
19428 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %a, %b
19429 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t a,uint8x8_t b)19430 uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) {
19431   return vsub_u8(a, b);
19432 }
19433 
19434 // CHECK-LABEL: @test_vsub_u16(
19435 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, %b
19436 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t a,uint16x4_t b)19437 uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) {
19438   return vsub_u16(a, b);
19439 }
19440 
19441 // CHECK-LABEL: @test_vsub_u32(
19442 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, %b
19443 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t a,uint32x2_t b)19444 uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) {
19445   return vsub_u32(a, b);
19446 }
19447 
19448 // CHECK-LABEL: @test_vsub_u64(
19449 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %a, %b
19450 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t a,uint64x1_t b)19451 uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) {
19452   return vsub_u64(a, b);
19453 }
19454 
19455 // CHECK-LABEL: @test_vsubq_s8(
19456 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %a, %b
19457 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t a,int8x16_t b)19458 int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) {
19459   return vsubq_s8(a, b);
19460 }
19461 
19462 // CHECK-LABEL: @test_vsubq_s16(
19463 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, %b
19464 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t a,int16x8_t b)19465 int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) {
19466   return vsubq_s16(a, b);
19467 }
19468 
19469 // CHECK-LABEL: @test_vsubq_s32(
19470 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, %b
19471 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t a,int32x4_t b)19472 int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) {
19473   return vsubq_s32(a, b);
19474 }
19475 
19476 // CHECK-LABEL: @test_vsubq_s64(
19477 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, %b
19478 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t a,int64x2_t b)19479 int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) {
19480   return vsubq_s64(a, b);
19481 }
19482 
19483 // CHECK-LABEL: @test_vsubq_f32(
19484 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %a, %b
19485 // CHECK:   ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t a,float32x4_t b)19486 float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) {
19487   return vsubq_f32(a, b);
19488 }
19489 
19490 // CHECK-LABEL: @test_vsubq_u8(
19491 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %a, %b
19492 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t a,uint8x16_t b)19493 uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) {
19494   return vsubq_u8(a, b);
19495 }
19496 
19497 // CHECK-LABEL: @test_vsubq_u16(
19498 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, %b
19499 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t a,uint16x8_t b)19500 uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) {
19501   return vsubq_u16(a, b);
19502 }
19503 
19504 // CHECK-LABEL: @test_vsubq_u32(
19505 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, %b
19506 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t a,uint32x4_t b)19507 uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) {
19508   return vsubq_u32(a, b);
19509 }
19510 
19511 // CHECK-LABEL: @test_vsubq_u64(
19512 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, %b
19513 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t a,uint64x2_t b)19514 uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) {
19515   return vsubq_u64(a, b);
19516 }
19517 
19518 // CHECK-LABEL: @test_vsubhn_s16(
19519 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19520 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19521 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
19522 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
19523 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
19524 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)19525 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
19526   return vsubhn_s16(a, b);
19527 }
19528 
19529 // CHECK-LABEL: @test_vsubhn_s32(
19530 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19531 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19532 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
19533 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
19534 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
19535 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)19536 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
19537   return vsubhn_s32(a, b);
19538 }
19539 
19540 // CHECK-LABEL: @test_vsubhn_s64(
19541 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
19542 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
19543 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
19544 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
19545 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
19546 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)19547 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
19548   return vsubhn_s64(a, b);
19549 }
19550 
19551 // CHECK-LABEL: @test_vsubhn_u16(
19552 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19553 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19554 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
19555 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
19556 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
19557 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)19558 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
19559   return vsubhn_u16(a, b);
19560 }
19561 
19562 // CHECK-LABEL: @test_vsubhn_u32(
19563 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19564 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19565 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
19566 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
19567 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
19568 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)19569 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
19570   return vsubhn_u32(a, b);
19571 }
19572 
19573 // CHECK-LABEL: @test_vsubhn_u64(
19574 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
19575 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
19576 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
19577 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
19578 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
19579 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)19580 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
19581   return vsubhn_u64(a, b);
19582 }
19583 
19584 // CHECK-LABEL: @test_vsubl_s8(
19585 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
19586 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
19587 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19588 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)19589 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
19590   return vsubl_s8(a, b);
19591 }
19592 
19593 // CHECK-LABEL: @test_vsubl_s16(
19594 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19595 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
19596 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19597 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
19598 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19599 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)19600 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
19601   return vsubl_s16(a, b);
19602 }
19603 
19604 // CHECK-LABEL: @test_vsubl_s32(
19605 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19606 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
19607 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19608 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
19609 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19610 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)19611 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
19612   return vsubl_s32(a, b);
19613 }
19614 
19615 // CHECK-LABEL: @test_vsubl_u8(
19616 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
19617 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
19618 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19619 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)19620 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
19621   return vsubl_u8(a, b);
19622 }
19623 
19624 // CHECK-LABEL: @test_vsubl_u16(
19625 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19626 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
19627 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19628 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
19629 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19630 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)19631 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
19632   return vsubl_u16(a, b);
19633 }
19634 
19635 // CHECK-LABEL: @test_vsubl_u32(
19636 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19637 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
19638 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19639 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
19640 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19641 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)19642 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
19643   return vsubl_u32(a, b);
19644 }
19645 
19646 // CHECK-LABEL: @test_vsubw_s8(
19647 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
19648 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
19649 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)19650 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
19651   return vsubw_s8(a, b);
19652 }
19653 
19654 // CHECK-LABEL: @test_vsubw_s16(
19655 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19656 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
19657 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
19658 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)19659 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
19660   return vsubw_s16(a, b);
19661 }
19662 
19663 // CHECK-LABEL: @test_vsubw_s32(
19664 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19665 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
19666 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
19667 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)19668 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
19669   return vsubw_s32(a, b);
19670 }
19671 
19672 // CHECK-LABEL: @test_vsubw_u8(
19673 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
19674 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
19675 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)19676 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
19677   return vsubw_u8(a, b);
19678 }
19679 
19680 // CHECK-LABEL: @test_vsubw_u16(
19681 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19682 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
19683 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
19684 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)19685 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
19686   return vsubw_u16(a, b);
19687 }
19688 
19689 // CHECK-LABEL: @test_vsubw_u32(
19690 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19691 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
19692 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
19693 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)19694 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
19695   return vsubw_u32(a, b);
19696 }
19697 
19698 // CHECK-LABEL: @test_vtbl1_u8(
19699 // CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19700 // CHECK:   ret <8 x i8> [[VTBL1_I]]
test_vtbl1_u8(uint8x8_t a,uint8x8_t b)19701 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
19702   return vtbl1_u8(a, b);
19703 }
19704 
19705 // CHECK-LABEL: @test_vtbl1_s8(
19706 // CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19707 // CHECK:   ret <8 x i8> [[VTBL1_I]]
test_vtbl1_s8(int8x8_t a,int8x8_t b)19708 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
19709   return vtbl1_s8(a, b);
19710 }
19711 
19712 // CHECK-LABEL: @test_vtbl1_p8(
19713 // CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19714 // CHECK:   ret <8 x i8> [[VTBL1_I]]
test_vtbl1_p8(poly8x8_t a,uint8x8_t b)19715 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
19716   return vtbl1_p8(a, b);
19717 }
19718 
19719 // CHECK-LABEL: @test_vtbl2_u8(
19720 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
19721 // CHECK:   [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8
19722 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
19723 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19724 // CHECK:   store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19725 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
19726 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19727 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19728 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19729 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19730 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19731 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19732 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19733 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19734 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19735 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19736 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19737 // CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19738 // CHECK:   ret <8 x i8> [[VTBL2_I]]
test_vtbl2_u8(uint8x8x2_t a,uint8x8_t b)19739 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
19740   return vtbl2_u8(a, b);
19741 }
19742 
19743 // CHECK-LABEL: @test_vtbl2_s8(
19744 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8
19745 // CHECK:   [[A:%.*]] = alloca %struct.int8x8x2_t, align 8
19746 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
19747 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19748 // CHECK:   store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19749 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
19750 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19751 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19752 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19753 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19754 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19755 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19756 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19757 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19758 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19759 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19760 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19761 // CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19762 // CHECK:   ret <8 x i8> [[VTBL2_I]]
test_vtbl2_s8(int8x8x2_t a,int8x8_t b)19763 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
19764   return vtbl2_s8(a, b);
19765 }
19766 
19767 // CHECK-LABEL: @test_vtbl2_p8(
19768 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
19769 // CHECK:   [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8
19770 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
19771 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19772 // CHECK:   store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19773 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
19774 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19775 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19776 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19777 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19778 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19779 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19780 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19781 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19782 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19783 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19784 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19785 // CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19786 // CHECK:   ret <8 x i8> [[VTBL2_I]]
test_vtbl2_p8(poly8x8x2_t a,uint8x8_t b)19787 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
19788   return vtbl2_p8(a, b);
19789 }
19790 
19791 // CHECK-LABEL: @test_vtbl3_u8(
19792 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
19793 // CHECK:   [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8
19794 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
19795 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19796 // CHECK:   store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19797 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
19798 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19799 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19800 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19801 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19802 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19803 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19804 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19805 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19806 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19807 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19808 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19809 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19810 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19811 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19812 // CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19813 // CHECK:   ret <8 x i8> [[VTBL3_I]]
test_vtbl3_u8(uint8x8x3_t a,uint8x8_t b)19814 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
19815   return vtbl3_u8(a, b);
19816 }
19817 
19818 // CHECK-LABEL: @test_vtbl3_s8(
19819 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8
19820 // CHECK:   [[A:%.*]] = alloca %struct.int8x8x3_t, align 8
19821 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
19822 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19823 // CHECK:   store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19824 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
19825 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19826 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19827 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19828 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19829 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19830 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19831 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19832 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19833 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19834 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19835 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19836 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19837 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19838 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19839 // CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19840 // CHECK:   ret <8 x i8> [[VTBL3_I]]
test_vtbl3_s8(int8x8x3_t a,int8x8_t b)19841 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
19842   return vtbl3_s8(a, b);
19843 }
19844 
19845 // CHECK-LABEL: @test_vtbl3_p8(
19846 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
19847 // CHECK:   [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8
19848 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
19849 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19850 // CHECK:   store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19851 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
19852 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19853 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19854 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19855 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19856 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19857 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19858 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19859 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19860 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19861 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19862 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19863 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19864 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19865 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19866 // CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19867 // CHECK:   ret <8 x i8> [[VTBL3_I]]
test_vtbl3_p8(poly8x8x3_t a,uint8x8_t b)19868 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
19869   return vtbl3_p8(a, b);
19870 }
19871 
19872 // CHECK-LABEL: @test_vtbl4_u8(
19873 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
19874 // CHECK:   [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8
19875 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
19876 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19877 // CHECK:   store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19878 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
19879 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19880 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19881 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19882 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19883 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19884 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19885 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19886 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19887 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19888 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19889 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19890 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19891 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19892 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19893 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19894 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19895 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19896 // CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19897 // CHECK:   ret <8 x i8> [[VTBL4_I]]
test_vtbl4_u8(uint8x8x4_t a,uint8x8_t b)19898 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
19899   return vtbl4_u8(a, b);
19900 }
19901 
19902 // CHECK-LABEL: @test_vtbl4_s8(
19903 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8
19904 // CHECK:   [[A:%.*]] = alloca %struct.int8x8x4_t, align 8
19905 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
19906 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19907 // CHECK:   store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19908 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
19909 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19910 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19911 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19912 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19913 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19914 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19915 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19916 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19917 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19918 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19919 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19920 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19921 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19922 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19923 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19924 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19925 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19926 // CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19927 // CHECK:   ret <8 x i8> [[VTBL4_I]]
test_vtbl4_s8(int8x8x4_t a,int8x8_t b)19928 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
19929   return vtbl4_s8(a, b);
19930 }
19931 
19932 // CHECK-LABEL: @test_vtbl4_p8(
19933 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
19934 // CHECK:   [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8
19935 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
19936 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19937 // CHECK:   store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19938 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
19939 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19940 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19941 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19942 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19943 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19944 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19945 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19946 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19947 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19948 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19949 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19950 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19951 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19952 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19953 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19954 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19955 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19956 // CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19957 // CHECK:   ret <8 x i8> [[VTBL4_I]]
test_vtbl4_p8(poly8x8x4_t a,uint8x8_t b)19958 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
19959   return vtbl4_p8(a, b);
19960 }
19961 
19962 // CHECK-LABEL: @test_vtbx1_u8(
19963 // CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
19964 // CHECK:   ret <8 x i8> [[VTBX1_I]]
test_vtbx1_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)19965 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
19966   return vtbx1_u8(a, b, c);
19967 }
19968 
19969 // CHECK-LABEL: @test_vtbx1_s8(
19970 // CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
19971 // CHECK:   ret <8 x i8> [[VTBX1_I]]
test_vtbx1_s8(int8x8_t a,int8x8_t b,int8x8_t c)19972 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
19973   return vtbx1_s8(a, b, c);
19974 }
19975 
19976 // CHECK-LABEL: @test_vtbx1_p8(
19977 // CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
19978 // CHECK:   ret <8 x i8> [[VTBX1_I]]
test_vtbx1_p8(poly8x8_t a,poly8x8_t b,uint8x8_t c)19979 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
19980   return vtbx1_p8(a, b, c);
19981 }
19982 
19983 // CHECK-LABEL: @test_vtbx2_u8(
19984 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
19985 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
19986 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
19987 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19988 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
19989 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
19990 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19991 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19992 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
19993 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19994 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19995 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
19996 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19997 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19998 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
19999 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20000 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20001 // CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20002 // CHECK:   ret <8 x i8> [[VTBX2_I]]
test_vtbx2_u8(uint8x8_t a,uint8x8x2_t b,uint8x8_t c)20003 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
20004   return vtbx2_u8(a, b, c);
20005 }
20006 
20007 // CHECK-LABEL: @test_vtbx2_s8(
20008 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8
20009 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
20010 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
20011 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20012 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20013 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
20014 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20015 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20016 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20017 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20018 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20019 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20020 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20021 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20022 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20023 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20024 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20025 // CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20026 // CHECK:   ret <8 x i8> [[VTBX2_I]]
test_vtbx2_s8(int8x8_t a,int8x8x2_t b,int8x8_t c)20027 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
20028   return vtbx2_s8(a, b, c);
20029 }
20030 
20031 // CHECK-LABEL: @test_vtbx2_p8(
20032 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
20033 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
20034 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
20035 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20036 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20037 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
20038 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20039 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20040 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20041 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20042 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20043 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20044 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20045 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20046 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20047 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20048 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20049 // CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20050 // CHECK:   ret <8 x i8> [[VTBX2_I]]
test_vtbx2_p8(poly8x8_t a,poly8x8x2_t b,uint8x8_t c)20051 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
20052   return vtbx2_p8(a, b, c);
20053 }
20054 
20055 // CHECK-LABEL: @test_vtbx3_u8(
20056 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
20057 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
20058 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
20059 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20060 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20061 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
20062 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20063 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20064 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20065 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20066 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20067 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20068 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20069 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20070 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20071 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20072 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20073 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20074 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20075 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20076 // CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20077 // CHECK:   ret <8 x i8> [[VTBX3_I]]
test_vtbx3_u8(uint8x8_t a,uint8x8x3_t b,uint8x8_t c)20078 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
20079   return vtbx3_u8(a, b, c);
20080 }
20081 
20082 // CHECK-LABEL: @test_vtbx3_s8(
20083 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8
20084 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
20085 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
20086 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20087 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20088 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
20089 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20090 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20091 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20092 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20093 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20094 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20095 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20096 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20097 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20098 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20099 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20100 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20101 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20102 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20103 // CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20104 // CHECK:   ret <8 x i8> [[VTBX3_I]]
test_vtbx3_s8(int8x8_t a,int8x8x3_t b,int8x8_t c)20105 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
20106   return vtbx3_s8(a, b, c);
20107 }
20108 
20109 // CHECK-LABEL: @test_vtbx3_p8(
20110 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
20111 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
20112 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
20113 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20114 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20115 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
20116 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20117 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20118 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20119 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20120 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20121 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20122 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20123 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20124 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20125 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20126 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20127 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20128 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20129 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20130 // CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20131 // CHECK:   ret <8 x i8> [[VTBX3_I]]
test_vtbx3_p8(poly8x8_t a,poly8x8x3_t b,uint8x8_t c)20132 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
20133   return vtbx3_p8(a, b, c);
20134 }
20135 
20136 // CHECK-LABEL: @test_vtbx4_u8(
20137 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
20138 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
20139 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
20140 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20141 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20142 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
20143 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20144 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20145 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20146 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20147 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20148 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20149 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20150 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20151 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20152 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20153 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20154 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20155 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20156 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20157 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20158 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20159 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20160 // CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20161 // CHECK:   ret <8 x i8> [[VTBX4_I]]
test_vtbx4_u8(uint8x8_t a,uint8x8x4_t b,uint8x8_t c)20162 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
20163   return vtbx4_u8(a, b, c);
20164 }
20165 
20166 // CHECK-LABEL: @test_vtbx4_s8(
20167 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8
20168 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
20169 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
20170 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20171 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20172 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
20173 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20174 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20175 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20176 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20177 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20178 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20179 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20180 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20181 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20182 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20183 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20184 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20185 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20186 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20187 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20188 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20189 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20190 // CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20191 // CHECK:   ret <8 x i8> [[VTBX4_I]]
test_vtbx4_s8(int8x8_t a,int8x8x4_t b,int8x8_t c)20192 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
20193   return vtbx4_s8(a, b, c);
20194 }
20195 
20196 // CHECK-LABEL: @test_vtbx4_p8(
20197 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
20198 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
20199 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
20200 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20201 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20202 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
20203 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20204 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20205 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20206 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20207 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20208 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20209 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20210 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20211 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20212 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20213 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20214 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20215 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20216 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20217 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20218 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20219 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20220 // CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20221 // CHECK:   ret <8 x i8> [[VTBX4_I]]
test_vtbx4_p8(poly8x8_t a,poly8x8x4_t b,uint8x8_t c)20222 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
20223   return vtbx4_p8(a, b, c);
20224 }
20225 
20226 // CHECK: @test_vtrn_s8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20227 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20228 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20229 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20230 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !3
20231 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20232 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20233 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !3
20234 // CHECK:   ret void
test_vtrn_s8(int8x8_t a,int8x8_t b)20235 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
20236   return vtrn_s8(a, b);
20237 }
20238 
20239 // CHECK: @test_vtrn_s16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20240 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20241 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20242 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20243 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20244 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20245 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !6
20246 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20247 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20248 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !6
20249 // CHECK:   ret void
test_vtrn_s16(int16x4_t a,int16x4_t b)20250 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
20251   return vtrn_s16(a, b);
20252 }
20253 
20254 // CHECK: @test_vtrn_s32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20255 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20256 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20257 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20258 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20259 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20260 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !9
20261 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20262 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20263 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !9
20264 // CHECK:   ret void
test_vtrn_s32(int32x2_t a,int32x2_t b)20265 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
20266   return vtrn_s32(a, b);
20267 }
20268 
20269 // CHECK: @test_vtrn_u8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20270 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20271 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20272 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20273 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !12
20274 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20275 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20276 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !12
20277 // CHECK:   ret void
test_vtrn_u8(uint8x8_t a,uint8x8_t b)20278 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
20279   return vtrn_u8(a, b);
20280 }
20281 
20282 // CHECK: @test_vtrn_u16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20283 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20284 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20285 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20286 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20287 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20288 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !15
20289 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20290 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20291 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !15
20292 // CHECK:   ret void
test_vtrn_u16(uint16x4_t a,uint16x4_t b)20293 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
20294   return vtrn_u16(a, b);
20295 }
20296 
20297 // CHECK: @test_vtrn_u32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20298 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20299 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20300 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20301 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20302 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20303 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !18
20304 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20305 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20306 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !18
20307 // CHECK:   ret void
test_vtrn_u32(uint32x2_t a,uint32x2_t b)20308 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
20309   return vtrn_u32(a, b);
20310 }
20311 
20312 // CHECK: @test_vtrn_f32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20313 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20314 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20315 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20316 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20317 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20318 // CHECK:   store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !21
20319 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
20320 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
20321 // CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !21
20322 // CHECK:   ret void
test_vtrn_f32(float32x2_t a,float32x2_t b)20323 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
20324   return vtrn_f32(a, b);
20325 }
20326 
20327 // CHECK: @test_vtrn_p8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20328 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
20329 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20330 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20331 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !24
20332 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20333 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20334 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !24
20335 // CHECK:   ret void
test_vtrn_p8(poly8x8_t a,poly8x8_t b)20336 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
20337   return vtrn_p8(a, b);
20338 }
20339 
20340 // CHECK: @test_vtrn_p16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20341 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
20342 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20343 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20344 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20345 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20346 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !27
20347 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20348 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20349 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !27
20350 // CHECK:   ret void
test_vtrn_p16(poly16x4_t a,poly16x4_t b)20351 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
20352   return vtrn_p16(a, b);
20353 }
20354 
20355 // CHECK: @test_vtrnq_s8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20356 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
20357 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20358 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20359 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !30
20360 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20361 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20362 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !30
20363 // CHECK:   ret void
test_vtrnq_s8(int8x16_t a,int8x16_t b)20364 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
20365   return vtrnq_s8(a, b);
20366 }
20367 
20368 // CHECK: @test_vtrnq_s16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20369 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
20370 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20371 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20372 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20373 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20374 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !33
20375 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20376 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20377 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !33
20378 // CHECK:   ret void
test_vtrnq_s16(int16x8_t a,int16x8_t b)20379 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
20380   return vtrnq_s16(a, b);
20381 }
20382 
20383 // CHECK: @test_vtrnq_s32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20384 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
20385 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20386 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20387 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20388 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20389 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !36
20390 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20391 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20392 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !36
20393 // CHECK:   ret void
test_vtrnq_s32(int32x4_t a,int32x4_t b)20394 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
20395   return vtrnq_s32(a, b);
20396 }
20397 
20398 // CHECK: @test_vtrnq_u8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20399 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
20400 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20401 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20402 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !39
20403 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20404 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20405 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !39
20406 // CHECK:   ret void
test_vtrnq_u8(uint8x16_t a,uint8x16_t b)20407 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
20408   return vtrnq_u8(a, b);
20409 }
20410 
20411 // CHECK: @test_vtrnq_u16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20412 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
20413 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20414 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20415 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20416 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20417 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !42
20418 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20419 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20420 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !42
20421 // CHECK:   ret void
test_vtrnq_u16(uint16x8_t a,uint16x8_t b)20422 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
20423   return vtrnq_u16(a, b);
20424 }
20425 
20426 // CHECK: @test_vtrnq_u32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20427 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
20428 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20429 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20430 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20431 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20432 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !45
20433 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20434 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20435 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !45
20436 // CHECK:   ret void
test_vtrnq_u32(uint32x4_t a,uint32x4_t b)20437 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
20438   return vtrnq_u32(a, b);
20439 }
20440 
20441 // CHECK: @test_vtrnq_f32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20442 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
20443 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
20444 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
20445 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
20446 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20447 // CHECK:   store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !48
20448 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
20449 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20450 // CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !48
20451 // CHECK:   ret void
test_vtrnq_f32(float32x4_t a,float32x4_t b)20452 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
20453   return vtrnq_f32(a, b);
20454 }
20455 
20456 // CHECK: @test_vtrnq_p8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20457 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
20458 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20459 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20460 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !51
20461 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20462 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20463 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !51
20464 // CHECK:   ret void
test_vtrnq_p8(poly8x16_t a,poly8x16_t b)20465 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
20466   return vtrnq_p8(a, b);
20467 }
20468 
20469 // CHECK: @test_vtrnq_p16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20470 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
20471 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20472 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20473 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20474 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20475 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !54
20476 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20477 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20478 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !54
20479 // CHECK:   ret void
test_vtrnq_p16(poly16x8_t a,poly16x8_t b)20480 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
20481   return vtrnq_p16(a, b);
20482 }
20483 
20484 // CHECK-LABEL: @test_vtst_s8(
20485 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %a, %b
20486 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20487 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20488 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t a,int8x8_t b)20489 uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) {
20490   return vtst_s8(a, b);
20491 }
20492 
20493 // CHECK-LABEL: @test_vtst_s16(
20494 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20495 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20496 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %a, %b
20497 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20498 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20499 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t a,int16x4_t b)20500 uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) {
20501   return vtst_s16(a, b);
20502 }
20503 
20504 // CHECK-LABEL: @test_vtst_s32(
20505 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20506 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20507 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %a, %b
20508 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
20509 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
20510 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t a,int32x2_t b)20511 uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) {
20512   return vtst_s32(a, b);
20513 }
20514 
20515 // CHECK-LABEL: @test_vtst_u8(
20516 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %a, %b
20517 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20518 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20519 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t a,uint8x8_t b)20520 uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) {
20521   return vtst_u8(a, b);
20522 }
20523 
20524 // CHECK-LABEL: @test_vtst_u16(
20525 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20526 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20527 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %a, %b
20528 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20529 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20530 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t a,uint16x4_t b)20531 uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) {
20532   return vtst_u16(a, b);
20533 }
20534 
20535 // CHECK-LABEL: @test_vtst_u32(
20536 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20537 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20538 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %a, %b
20539 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
20540 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
20541 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t a,uint32x2_t b)20542 uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) {
20543   return vtst_u32(a, b);
20544 }
20545 
20546 // CHECK-LABEL: @test_vtst_p8(
20547 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %a, %b
20548 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20549 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20550 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t a,poly8x8_t b)20551 uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) {
20552   return vtst_p8(a, b);
20553 }
20554 
20555 // CHECK-LABEL: @test_vtst_p16(
20556 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20557 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20558 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %a, %b
20559 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20560 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20561 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t a,poly16x4_t b)20562 uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) {
20563   return vtst_p16(a, b);
20564 }
20565 
20566 // CHECK-LABEL: @test_vtstq_s8(
20567 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %a, %b
20568 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20569 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20570 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t a,int8x16_t b)20571 uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) {
20572   return vtstq_s8(a, b);
20573 }
20574 
20575 // CHECK-LABEL: @test_vtstq_s16(
20576 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20577 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20578 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %a, %b
20579 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20580 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20581 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t a,int16x8_t b)20582 uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) {
20583   return vtstq_s16(a, b);
20584 }
20585 
20586 // CHECK-LABEL: @test_vtstq_s32(
20587 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20588 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20589 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %a, %b
20590 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
20591 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
20592 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t a,int32x4_t b)20593 uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) {
20594   return vtstq_s32(a, b);
20595 }
20596 
20597 // CHECK-LABEL: @test_vtstq_u8(
20598 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %a, %b
20599 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20600 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20601 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t a,uint8x16_t b)20602 uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) {
20603   return vtstq_u8(a, b);
20604 }
20605 
20606 // CHECK-LABEL: @test_vtstq_u16(
20607 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20608 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20609 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %a, %b
20610 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20611 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20612 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t a,uint16x8_t b)20613 uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) {
20614   return vtstq_u16(a, b);
20615 }
20616 
20617 // CHECK-LABEL: @test_vtstq_u32(
20618 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20619 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20620 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %a, %b
20621 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
20622 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
20623 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t a,uint32x4_t b)20624 uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) {
20625   return vtstq_u32(a, b);
20626 }
20627 
20628 // CHECK-LABEL: @test_vtstq_p8(
20629 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %a, %b
20630 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20631 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20632 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t a,poly8x16_t b)20633 uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) {
20634   return vtstq_p8(a, b);
20635 }
20636 
20637 // CHECK-LABEL: @test_vtstq_p16(
20638 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20639 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20640 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %a, %b
20641 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20642 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20643 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t a,poly16x8_t b)20644 uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) {
20645   return vtstq_p16(a, b);
20646 }
20647 
20648 // CHECK: @test_vuzp_s8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20649 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20650 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20651 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20652 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !57
20653 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20654 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20655 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !57
20656 // CHECK:   ret void
test_vuzp_s8(int8x8_t a,int8x8_t b)20657 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
20658   return vuzp_s8(a, b);
20659 }
20660 
20661 // CHECK: @test_vuzp_s16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20662 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20663 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20664 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20665 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20666 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20667 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !60
20668 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20669 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20670 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !60
20671 // CHECK:   ret void
test_vuzp_s16(int16x4_t a,int16x4_t b)20672 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
20673   return vuzp_s16(a, b);
20674 }
20675 
20676 // CHECK: @test_vuzp_s32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20677 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20678 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20679 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20680 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20681 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20682 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !63
20683 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20684 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20685 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !63
20686 // CHECK:   ret void
test_vuzp_s32(int32x2_t a,int32x2_t b)20687 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
20688   return vuzp_s32(a, b);
20689 }
20690 
20691 // CHECK: @test_vuzp_u8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20692 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20693 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20694 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20695 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !66
20696 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20697 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20698 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !66
20699 // CHECK:   ret void
test_vuzp_u8(uint8x8_t a,uint8x8_t b)20700 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
20701   return vuzp_u8(a, b);
20702 }
20703 
20704 // CHECK: @test_vuzp_u16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20705 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20706 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20707 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20708 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20709 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20710 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !69
20711 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20712 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20713 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !69
20714 // CHECK:   ret void
test_vuzp_u16(uint16x4_t a,uint16x4_t b)20715 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
20716   return vuzp_u16(a, b);
20717 }
20718 
20719 // CHECK: @test_vuzp_u32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20720 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20721 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20722 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20723 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20724 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20725 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !72
20726 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20727 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20728 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !72
20729 // CHECK:   ret void
test_vuzp_u32(uint32x2_t a,uint32x2_t b)20730 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
20731   return vuzp_u32(a, b);
20732 }
20733 
20734 // CHECK: @test_vuzp_f32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20735 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20736 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20737 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20738 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20739 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20740 // CHECK:   store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !75
20741 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
20742 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
20743 // CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !75
20744 // CHECK:   ret void
test_vuzp_f32(float32x2_t a,float32x2_t b)20745 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
20746   return vuzp_f32(a, b);
20747 }
20748 
20749 // CHECK: @test_vuzp_p8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20750 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
20751 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20752 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20753 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !78
20754 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20755 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20756 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !78
20757 // CHECK:   ret void
test_vuzp_p8(poly8x8_t a,poly8x8_t b)20758 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
20759   return vuzp_p8(a, b);
20760 }
20761 
20762 // CHECK: @test_vuzp_p16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20763 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
20764 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20765 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20766 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20767 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20768 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !81
20769 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20770 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20771 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !81
20772 // CHECK:   ret void
test_vuzp_p16(poly16x4_t a,poly16x4_t b)20773 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
20774   return vuzp_p16(a, b);
20775 }
20776 
20777 // CHECK: @test_vuzpq_s8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20778 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
20779 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20780 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20781 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !84
20782 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20783 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20784 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !84
20785 // CHECK:   ret void
test_vuzpq_s8(int8x16_t a,int8x16_t b)20786 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
20787   return vuzpq_s8(a, b);
20788 }
20789 
20790 // CHECK: @test_vuzpq_s16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20791 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
20792 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20793 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20794 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20795 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20796 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !87
20797 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20798 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20799 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !87
20800 // CHECK:   ret void
test_vuzpq_s16(int16x8_t a,int16x8_t b)20801 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
20802   return vuzpq_s16(a, b);
20803 }
20804 
20805 // CHECK: @test_vuzpq_s32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20806 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
20807 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20808 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20809 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20810 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20811 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !90
20812 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20813 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20814 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !90
20815 // CHECK:   ret void
test_vuzpq_s32(int32x4_t a,int32x4_t b)20816 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
20817   return vuzpq_s32(a, b);
20818 }
20819 
20820 // CHECK: @test_vuzpq_u8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20821 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
20822 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20823 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20824 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !93
20825 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20826 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20827 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !93
20828 // CHECK:   ret void
test_vuzpq_u8(uint8x16_t a,uint8x16_t b)20829 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
20830   return vuzpq_u8(a, b);
20831 }
20832 
20833 // CHECK: @test_vuzpq_u16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20834 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
20835 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20836 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20837 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20838 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20839 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !96
20840 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20841 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20842 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !96
20843 // CHECK:   ret void
test_vuzpq_u16(uint16x8_t a,uint16x8_t b)20844 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
20845   return vuzpq_u16(a, b);
20846 }
20847 
20848 // CHECK: @test_vuzpq_u32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20849 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
20850 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20851 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20852 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20853 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20854 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !99
20855 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20856 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20857 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !99
20858 // CHECK:   ret void
test_vuzpq_u32(uint32x4_t a,uint32x4_t b)20859 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
20860   return vuzpq_u32(a, b);
20861 }
20862 
20863 // CHECK: @test_vuzpq_f32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20864 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
20865 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
20866 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
20867 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
20868 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20869 // CHECK:   store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !102
20870 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
20871 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20872 // CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !102
20873 // CHECK:   ret void
test_vuzpq_f32(float32x4_t a,float32x4_t b)20874 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
20875   return vuzpq_f32(a, b);
20876 }
20877 
20878 // CHECK: @test_vuzpq_p8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20879 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
20880 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20881 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20882 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !105
20883 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20884 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20885 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !105
20886 // CHECK:   ret void
test_vuzpq_p8(poly8x16_t a,poly8x16_t b)20887 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
20888   return vuzpq_p8(a, b);
20889 }
20890 
20891 // CHECK: @test_vuzpq_p16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20892 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
20893 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20894 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20895 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20896 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20897 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !108
20898 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20899 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20900 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !108
20901 // CHECK:   ret void
test_vuzpq_p16(poly16x8_t a,poly16x8_t b)20902 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
20903   return vuzpq_p16(a, b);
20904 }
20905 
20906 // CHECK: @test_vzip_s8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20907 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20908 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20909 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
20910 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !111
20911 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20912 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
20913 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !111
20914 // CHECK:   ret void
test_vzip_s8(int8x8_t a,int8x8_t b)20915 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
20916   return vzip_s8(a, b);
20917 }
20918 
20919 // CHECK: @test_vzip_s16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20920 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20921 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20922 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20923 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20924 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
20925 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !114
20926 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20927 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
20928 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !114
20929 // CHECK:   ret void
test_vzip_s16(int16x4_t a,int16x4_t b)20930 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
20931   return vzip_s16(a, b);
20932 }
20933 
20934 // CHECK: @test_vzip_s32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20935 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20936 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20937 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20938 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20939 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20940 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !117
20941 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20942 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20943 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !117
20944 // CHECK:   ret void
test_vzip_s32(int32x2_t a,int32x2_t b)20945 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
20946   return vzip_s32(a, b);
20947 }
20948 
20949 // CHECK: @test_vzip_u8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20950 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20951 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20952 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
20953 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !120
20954 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20955 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
20956 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !120
20957 // CHECK:   ret void
test_vzip_u8(uint8x8_t a,uint8x8_t b)20958 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
20959   return vzip_u8(a, b);
20960 }
20961 
20962 // CHECK: @test_vzip_u16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20963 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20964 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20965 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20966 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20967 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
20968 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !123
20969 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20970 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
20971 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !123
20972 // CHECK:   ret void
test_vzip_u16(uint16x4_t a,uint16x4_t b)20973 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
20974   return vzip_u16(a, b);
20975 }
20976 
20977 // CHECK: @test_vzip_u32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20978 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20979 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20980 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20981 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20982 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20983 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !126
20984 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20985 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20986 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !126
20987 // CHECK:   ret void
test_vzip_u32(uint32x2_t a,uint32x2_t b)20988 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
20989   return vzip_u32(a, b);
20990 }
20991 
20992 // CHECK: @test_vzip_f32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20993 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20994 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20995 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20996 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20997 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20998 // CHECK:   store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !129
20999 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
21000 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
21001 // CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !129
21002 // CHECK:   ret void
test_vzip_f32(float32x2_t a,float32x2_t b)21003 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
21004   return vzip_f32(a, b);
21005 }
21006 
21007 // CHECK: @test_vzip_p8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21008 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
21009 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
21010 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21011 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !132
21012 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
21013 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21014 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !132
21015 // CHECK:   ret void
test_vzip_p8(poly8x8_t a,poly8x8_t b)21016 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
21017   return vzip_p8(a, b);
21018 }
21019 
21020 // CHECK: @test_vzip_p16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21021 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
21022 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
21023 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
21024 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
21025 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21026 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !135
21027 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
21028 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21029 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !135
21030 // CHECK:   ret void
test_vzip_p16(poly16x4_t a,poly16x4_t b)21031 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
21032   return vzip_p16(a, b);
21033 }
21034 
21035 // CHECK: @test_vzipq_s8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21036 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
21037 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21038 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21039 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !138
21040 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21041 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21042 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !138
21043 // CHECK:   ret void
test_vzipq_s8(int8x16_t a,int8x16_t b)21044 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
21045   return vzipq_s8(a, b);
21046 }
21047 
21048 // CHECK: @test_vzipq_s16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21049 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
21050 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21051 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21052 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21053 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21054 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !141
21055 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21056 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21057 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !141
21058 // CHECK:   ret void
test_vzipq_s16(int16x8_t a,int16x8_t b)21059 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
21060   return vzipq_s16(a, b);
21061 }
21062 
21063 // CHECK: @test_vzipq_s32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21064 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
21065 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21066 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21067 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
21068 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21069 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !144
21070 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
21071 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21072 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !144
21073 // CHECK:   ret void
test_vzipq_s32(int32x4_t a,int32x4_t b)21074 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
21075   return vzipq_s32(a, b);
21076 }
21077 
21078 // CHECK: @test_vzipq_u8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21079 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
21080 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21081 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21082 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !147
21083 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21084 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21085 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !147
21086 // CHECK:   ret void
test_vzipq_u8(uint8x16_t a,uint8x16_t b)21087 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
21088   return vzipq_u8(a, b);
21089 }
21090 
21091 // CHECK: @test_vzipq_u16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21092 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
21093 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21094 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21095 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21096 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21097 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !150
21098 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21099 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21100 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !150
21101 // CHECK:   ret void
test_vzipq_u16(uint16x8_t a,uint16x8_t b)21102 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
21103   return vzipq_u16(a, b);
21104 }
21105 
21106 // CHECK: @test_vzipq_u32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21107 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
21108 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21109 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21110 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
21111 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21112 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !153
21113 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
21114 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21115 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !153
21116 // CHECK:   ret void
test_vzipq_u32(uint32x4_t a,uint32x4_t b)21117 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
21118   return vzipq_u32(a, b);
21119 }
21120 
21121 // CHECK: @test_vzipq_f32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21122 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
21123 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
21124 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
21125 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
21126 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21127 // CHECK:   store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !156
21128 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
21129 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21130 // CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !156
21131 // CHECK:   ret void
test_vzipq_f32(float32x4_t a,float32x4_t b)21132 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
21133   return vzipq_f32(a, b);
21134 }
21135 
21136 // CHECK: @test_vzipq_p8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21137 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
21138 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21139 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21140 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !159
21141 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21142 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21143 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !159
21144 // CHECK:   ret void
test_vzipq_p8(poly8x16_t a,poly8x16_t b)21145 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
21146   return vzipq_p8(a, b);
21147 }
21148 
21149 // CHECK: @test_vzipq_p16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21150 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
21151 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21152 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21153 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21154 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21155 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !162
21156 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21157 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21158 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !162
21159 // CHECK:   ret void
test_vzipq_p16(poly16x8_t a,poly16x8_t b)21160 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
21161   return vzipq_p16(a, b);
21162 }
21163